lexbase.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. {
  2. This module collects the basic data types and operations used in the TP
  3. Lex program, and other basic stuff that does not belong anywhere else:
  4. - Lex input and output files and corresponding bookkeeping information
  5. used by the parser
  6. - symbolic character constants
  7. - dynamically allocated strings and character classes
  8. - integer sets
  9. - generic quicksort and hash table routines
  10. - utilities for list-generating
  11. - other tiny utilities
  12. Copyright (c) 1990-92 Albert Graef <[email protected]>
  13. Copyright (C) 1996 Berend de Boer <[email protected]>
  14. This program is free software; you can redistribute it and/or modify
  15. it under the terms of the GNU General Public License as published by
  16. the Free Software Foundation; either version 2 of the License, or
  17. (at your option) any later version.
  18. This program is distributed in the hope that it will be useful,
  19. but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. GNU General Public License for more details.
  22. You should have received a copy of the GNU General Public License
  23. along with this program; if not, write to the Free Software
  24. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  25. $Revision: 1.2 $
  26. $Modtime: 96-08-01 10:21 $
  27. $History: LEXBASE.PAS $
  28. *
  29. * ***************** Version 2 *****************
  30. * User: Berend Date: 96-10-10 Time: 21:16
  31. * Updated in $/Lex and Yacc/tply
  32. * Updated for protected mode, windows and Delphi 1.X and 2.X.
  33. }
  34. unit LexBase;
  35. interface
  36. const
  37. (* symbolic character constants: *)
  38. bs = #8; (* backspace character *)
  39. tab = #9; (* tab character *)
  40. nl = #10; (* newline character *)
  41. cr = #13; (* carriage return *)
  42. ff = #12; (* form feed character *)
  43. var
  44. (* Filenames: *)
  45. lfilename : String;
  46. pasfilename : String;
  47. lstfilename : String;
  48. codfilename : String;
  49. codfilepath : String; { Under linux, binary and conf file
  50. are not in the same path}
  51. (* Lex input, output, list and code template file: *)
  52. yyin, yylst, yyout, yycod : Text;
  53. (* the following values are initialized and updated by the parser: *)
  54. line : String; (* current input line *)
  55. lno : Integer; (* current line number *)
  56. const
  57. max_elems = 1000; (* maximum size of integer sets *)
  58. type
  59. (* String and character class pointers: *)
  60. StrPtr = ^String;
  61. CClass = set of Char;
  62. CClassPtr = ^CClass;
  63. (* Sorted integer sets: *)
  64. IntSet = array [0..max_elems] of Integer;
  65. (* word 0 is size *)
  66. IntSetPtr = ^IntSet;
  67. (* Regular expressions: *)
  68. RegExpr = ^Node;
  69. NodeType = (mark_node, (* marker node *)
  70. char_node, (* character node *)
  71. str_node, (* string node *)
  72. cclass_node, (* character class node *)
  73. star_node, (* star node *)
  74. plus_node, (* plus node *)
  75. opt_node, (* option node *)
  76. cat_node, (* concatenation node *)
  77. alt_node); (* alternatives node (|) *)
  78. Node = record case node_type : NodeType of
  79. mark_node : (rule, pos : Integer);
  80. char_node : (c : Char);
  81. str_node : (str : StrPtr);
  82. cclass_node : (cc : CClassPtr);
  83. star_node, plus_node, opt_node : (r : RegExpr);
  84. cat_node, alt_node : (r1, r2 : RegExpr);
  85. end;
  86. (* Some standard character classes: *)
  87. const
  88. letters : CClass = ['A'..'Z','a'..'z','_'];
  89. digits : CClass = ['0'..'9'];
  90. alphanums : CClass = ['A'..'Z','a'..'z','_','0'..'9'];
  91. (* Operations: *)
  92. (* Strings and character classes: *)
  93. function newStr(str : String) : StrPtr;
  94. (* creates a string pointer (only the space actually needed for the given
  95. string is allocated) *)
  96. function newCClass(cc : CClass) : CClassPtr;
  97. (* creates a CClass pointer *)
  98. (* Integer sets (set arguments are passed by reference even if they are not
  99. modified, for greater efficiency): *)
  100. procedure empty(var M : IntSet);
  101. (* initializes M as empty *)
  102. procedure singleton(var M : IntSet; i : Integer);
  103. (* initializes M as a singleton set containing the element i *)
  104. procedure include(var M : IntSet; i : Integer);
  105. (* include i in M *)
  106. procedure exclude(var M : IntSet; i : Integer);
  107. (* exclude i from M *)
  108. procedure setunion(var M, N : IntSet);
  109. (* adds N to M *)
  110. procedure setminus(var M, N : IntSet);
  111. (* removes N from M *)
  112. procedure intersect(var M, N : IntSet);
  113. (* removes from M all elements NOT in N *)
  114. function size(var M : IntSet) : Integer;
  115. (* cardinality of set M *)
  116. function member(i : Integer; var M : IntSet) : Boolean;
  117. (* tests for membership of i in M *)
  118. function isempty(var M : IntSet) : Boolean;
  119. (* checks whether M is an empty set *)
  120. function equal(var M, N : IntSet) : Boolean;
  121. (* checks whether M and N are equal *)
  122. function subseteq(var M, N : IntSet) : Boolean;
  123. (* checks whether M is a subset of N *)
  124. function newIntSet : IntSetPtr;
  125. (* creates a pointer to an empty integer set *)
  126. (* Constructors for regular expressions: *)
  127. const epsExpr : RegExpr = nil;
  128. (* empty regular expression *)
  129. function markExpr(rule, pos : Integer) : RegExpr;
  130. (* markers are used to denote endmarkers of rules, as well as other
  131. special positions in rules, e.g. the position of the lookahead
  132. operator; they are considered nullable; by convention, we use
  133. the following pos numbers:
  134. - 0: endmarker position
  135. - 1: lookahead operator position *)
  136. function charExpr(c : Char) : RegExpr;
  137. (* character c *)
  138. function strExpr(str : StrPtr) : RegExpr;
  139. (* "str" *)
  140. function cclassExpr(cc : CClassPtr) : RegExpr;
  141. (* [str] where str are the literals in cc *)
  142. function starExpr(r : RegExpr) : RegExpr;
  143. (* r* *)
  144. function plusExpr(r : RegExpr) : RegExpr;
  145. (* r+ *)
  146. function optExpr(r : RegExpr) : RegExpr;
  147. (* r? *)
  148. function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
  149. (* constructor expanding expression r{m,n} to the corresponding
  150. alt expression r^m|...|r^n *)
  151. function catExpr(r1, r2 : RegExpr) : RegExpr;
  152. (* r1r2 *)
  153. function altExpr(r1, r2 : RegExpr) : RegExpr;
  154. (* r1|r2 *)
  155. (* Unifiers for regular expressions:
  156. The following predicates check whether the specified regular
  157. expression r is of the denoted type; if the predicate succeeds,
  158. the other arguments of the predicate are instantiated to the
  159. corresponding values. *)
  160. function is_epsExpr(r : RegExpr) : Boolean;
  161. (* empty regular expression *)
  162. function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
  163. (* marker expression *)
  164. function is_charExpr(r : RegExpr; var c : Char) : Boolean;
  165. (* character c *)
  166. function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
  167. (* "str" *)
  168. function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
  169. (* [str] where str are the literals in cc *)
  170. function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  171. (* r1* *)
  172. function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  173. (* r1+ *)
  174. function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  175. (* r1? *)
  176. function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  177. (* r1r2 *)
  178. function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  179. (* r1|r2 *)
  180. (* Quicksort: *)
  181. type
  182. OrderPredicate = function (i, j : Integer) : Boolean;
  183. SwapProc = procedure (i, j : Integer);
  184. procedure quicksort(lo, hi: Integer;
  185. less : OrderPredicate;
  186. swap : SwapProc);
  187. (* General inplace sorting procedure based on the quicksort algorithm.
  188. This procedure can be applied to any sequential data structure;
  189. only the corresponding routines less which compares, and swap which
  190. swaps two elements i,j of the target data structure, must be
  191. supplied as appropriate for the target data structure.
  192. - lo, hi: the lower and higher indices, indicating the elements to
  193. be sorted
  194. - less(i, j): should return true if element no. i `is less than'
  195. element no. j, and false otherwise; any total quasi-ordering may
  196. be supplied here (if neither less(i, j) nor less(j, i) then elements
  197. i and j are assumed to be `equal').
  198. - swap(i, j): should swap the elements with index i and j *)
  199. (* Generic hash table routines (based on quadratic rehashing; hence the
  200. table size must be a prime number): *)
  201. type
  202. TableLookupProc = function(k : Integer) : String;
  203. TableEntryProc = procedure(k : Integer; symbol : String);
  204. function key(symbol : String;
  205. table_size : Integer;
  206. lookup : TableLookupProc;
  207. entry : TableEntryProc) : Integer;
  208. (* returns a hash table key for symbol; inserts the symbol into the
  209. table if necessary
  210. - table_size is the symbol table size and must be a fixed prime number
  211. - lookup is the table lookup procedure which should return the string
  212. at key k in the table ('' if entry is empty)
  213. - entry is the table entry procedure which is assumed to store the
  214. given symbol at the given location *)
  215. function definedKey(symbol : String;
  216. table_size : Integer;
  217. lookup : TableLookupProc) : Boolean;
  218. (* checks the table to see if symbol is in the table *)
  219. (* Utility routines: *)
  220. function min(i, j : Integer) : Integer;
  221. function max(i, j : Integer) : Integer;
  222. (* minimum and maximum of two integers *)
  223. function nchars(cc : CClass) : Integer;
  224. (* returns the cardinality (number of characters) of a character class *)
  225. function upper(str : String) : String;
  226. (* returns str converted to uppercase *)
  227. function strip(str : String) : String;
  228. (* returns str with leading and trailing blanks stripped off *)
  229. function blankStr(str : String) : String;
  230. (* returns string of same length as str, with all non-whitespace characters
  231. replaced by blanks *)
  232. function intStr(i : Integer) : String;
  233. (* returns the string representation of i *)
  234. function isInt(str : String; var i : Integer) : Boolean;
  235. (* checks whether str represents an integer; if so, returns the
  236. value of it in i *)
  237. function path(filename : String) : String;
  238. (* returns the path in filename *)
  239. function root(filename : String) : String;
  240. (* returns root (i.e. extension stripped from filename) of
  241. filename *)
  242. function addExt(filename, ext : String) : String;
  243. (* if filename has no extension and last filename character is not '.',
  244. add extension ext to filename *)
  245. function file_size(filename : String) : LongInt;
  246. (* determines file size in bytes *)
  247. (* Utility functions for list generating routines: *)
  248. function charStr(c : char; reserved : CClass) : String;
  249. (* returns a print name for character c, using the standard escape
  250. conventions; reserved is the class of `reserved' special characters
  251. which should be quoted with \ (\ itself is always quoted) *)
  252. function singleQuoteStr(str : String) : String;
  253. (* returns print name of str enclosed in single quotes, using the
  254. standard escape conventions *)
  255. function doubleQuoteStr(str : String) : String;
  256. (* returns print name of str enclosed in double quotes, using the
  257. standard escape conventions *)
  258. function cclassStr(cc : CClass) : String;
  259. (* returns print name of character class cc, using the standard escape
  260. conventions; if cc contains more than 128 elements, the complement
  261. notation (^) is used; if cc is the class of all (non-null) characters
  262. except newline, the period notation is used *)
  263. function cclassOrCharStr(cc : CClass) : String;
  264. (* returns a print name for character class cc (either cclassStr, or,
  265. if cc contains only one element, character in single quotes) *)
  266. function regExprStr(r : RegExpr) : String;
  267. (* unparses a regular expression *)
  268. implementation
  269. uses LexMsgs;
  270. (* String and character class pointers: *)
  271. function newStr(str : String) : StrPtr;
  272. var strp : StrPtr;
  273. begin
  274. getmem(strp, succ(length(str)));
  275. move(str, strp^, succ(length(str)));
  276. newStr := strp;
  277. end(*newStr*);
  278. function newCClass(cc : CClass) : CClassPtr;
  279. var ccp : CClassPtr;
  280. begin
  281. new(ccp);
  282. ccp^ := cc;
  283. newCClass := ccp;
  284. end(*newCClass*);
  285. (* Integer sets: *)
  286. procedure empty(var M : IntSet);
  287. begin
  288. M[0] := 0;
  289. end(*empty*);
  290. procedure singleton(var M : IntSet; i : Integer);
  291. begin
  292. M[0] := 1; M[1] := i;
  293. end(*singleton*);
  294. procedure include(var M : IntSet; i : Integer);
  295. var l, r, k : Integer;
  296. begin
  297. (* binary search: *)
  298. l := 1; r := M[0];
  299. k := l + (r-l) div 2;
  300. while (l<r) and (M[k]<>i) do
  301. begin
  302. if M[k]<i then
  303. l := succ(k)
  304. else
  305. r := pred(k);
  306. k := l + (r-l) div 2;
  307. end;
  308. if (k>M[0]) or (M[k]<>i) then
  309. begin
  310. if M[0]>=max_elems then fatal(intset_overflow);
  311. if (k<=M[0]) and (M[k]<i) then
  312. begin
  313. move(M[k+1], M[k+2], (M[0]-k)*sizeOf(Integer));
  314. M[k+1] := i;
  315. end
  316. else
  317. begin
  318. move(M[k], M[k+1], (M[0]-k+1)*sizeOf(Integer));
  319. M[k] := i;
  320. end;
  321. inc(M[0]);
  322. end;
  323. end(*include*);
  324. procedure exclude(var M : IntSet; i : Integer);
  325. var l, r, k : Integer;
  326. begin
  327. (* binary search: *)
  328. l := 1; r := M[0];
  329. k := l + (r-l) div 2;
  330. while (l<r) and (M[k]<>i) do
  331. begin
  332. if M[k]<i then
  333. l := succ(k)
  334. else
  335. r := pred(k);
  336. k := l + (r-l) div 2;
  337. end;
  338. if (k<=M[0]) and (M[k]=i) then
  339. begin
  340. move(M[k+1], M[k], (M[0]-k)*sizeOf(Integer));
  341. dec(M[0]);
  342. end;
  343. end(*exclude*);
  344. procedure setunion(var M, N : IntSet);
  345. var
  346. K : IntSet;
  347. i, j, i_M, i_N : Integer;
  348. begin
  349. (* merge sort: *)
  350. i := 0; i_M := 1; i_N := 1;
  351. while (i_M<=M[0]) and (i_N<=N[0]) do
  352. begin
  353. inc(i);
  354. if i>max_elems then fatal(intset_overflow);
  355. if M[i_M]<N[i_N] then
  356. begin
  357. K[i] := M[i_M]; inc(i_M);
  358. end
  359. else if N[i_N]<M[i_M] then
  360. begin
  361. K[i] := N[i_N]; inc(i_N);
  362. end
  363. else
  364. begin
  365. K[i] := M[i_M]; inc(i_M); inc(i_N);
  366. end
  367. end;
  368. for j := i_M to M[0] do
  369. begin
  370. inc(i);
  371. if i>max_elems then fatal(intset_overflow);
  372. K[i] := M[j];
  373. end;
  374. for j := i_N to N[0] do
  375. begin
  376. inc(i);
  377. if i>max_elems then fatal(intset_overflow);
  378. K[i] := N[j];
  379. end;
  380. K[0] := i;
  381. move(K, M, succ(i)*sizeOf(Integer));
  382. end(*setunion*);
  383. procedure setminus(var M, N : IntSet);
  384. var
  385. K : IntSet;
  386. i, i_M, i_N : Integer;
  387. begin
  388. i := 0; i_N := 1;
  389. for i_M := 1 to M[0] do
  390. begin
  391. while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
  392. if (i_N>N[0]) or (N[i_N]>M[i_M]) then
  393. begin
  394. inc(i);
  395. K[i] := M[i_M];
  396. end
  397. else
  398. inc(i_N);
  399. end;
  400. K[0] := i;
  401. move(K, M, succ(i)*sizeOf(Integer));
  402. end(*setminus*);
  403. procedure intersect(var M, N : IntSet);
  404. var
  405. K : IntSet;
  406. i, i_M, i_N : Integer;
  407. begin
  408. i := 0; i_N := 1;
  409. for i_M := 1 to M[0] do
  410. begin
  411. while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
  412. if (i_N<=N[0]) and (N[i_N]=M[i_M]) then
  413. begin
  414. inc(i);
  415. K[i] := M[i_M];
  416. inc(i_N);
  417. end
  418. end;
  419. K[0] := i;
  420. move(K, M, succ(i)*sizeOf(Integer));
  421. end(*intersect*);
  422. function size(var M : IntSet) : Integer;
  423. begin
  424. size := M[0]
  425. end(*size*);
  426. function member(i : Integer; var M : IntSet) : Boolean;
  427. var l, r, k : Integer;
  428. begin
  429. (* binary search: *)
  430. l := 1; r := M[0];
  431. k := l + (r-l) div 2;
  432. while (l<r) and (M[k]<>i) do
  433. begin
  434. if M[k]<i then
  435. l := succ(k)
  436. else
  437. r := pred(k);
  438. k := l + (r-l) div 2;
  439. end;
  440. member := (k<=M[0]) and (M[k]=i);
  441. end(*member*);
  442. function isempty(var M : IntSet) : Boolean;
  443. begin
  444. isempty := M[0]=0
  445. end(*isempty*);
  446. function equal(var M, N : IntSet) : Boolean;
  447. var i : Integer;
  448. begin
  449. if M[0]<>N[0] then
  450. equal := false
  451. else
  452. begin
  453. for i := 1 to M[0] do
  454. if M[i]<>N[i] then
  455. begin
  456. equal := false;
  457. exit
  458. end;
  459. equal := true
  460. end
  461. end(*equal*);
  462. function subseteq(var M, N : IntSet) : Boolean;
  463. var
  464. i_M, i_N : Integer;
  465. begin
  466. if M[0]>N[0] then
  467. subseteq := false
  468. else
  469. begin
  470. i_N := 1;
  471. for i_M := 1 to M[0] do
  472. begin
  473. while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
  474. if (i_N>N[0]) or (N[i_N]>M[i_M]) then
  475. begin
  476. subseteq := false;
  477. exit
  478. end
  479. else
  480. inc(i_N);
  481. end;
  482. subseteq := true
  483. end;
  484. end(*subseteq*);
  485. function newIntSet : IntSetPtr;
  486. var
  487. MP : IntSetPtr;
  488. begin
  489. getmem(MP, (max_elems+1)*sizeOf(Integer));
  490. MP^[0] := 0;
  491. newIntSet := MP
  492. end(*newIntSet*);
  493. (* Constructors for regular expressions: *)
  494. function newExpr(node_type : NodeType; n : Integer) : RegExpr;
  495. (* returns new RegExpr node (n: number of bytes to allocate) *)
  496. var x : RegExpr;
  497. begin
  498. getmem(x, sizeOf(NodeType)+n);
  499. x^.node_type := node_type;
  500. newExpr := x
  501. end(*newExpr*);
  502. function markExpr(rule, pos : Integer) : RegExpr;
  503. var x : RegExpr;
  504. begin
  505. x := newExpr(mark_node, 2*sizeOf(Integer));
  506. x^.rule := rule;
  507. x^.pos := pos;
  508. markExpr := x
  509. end(*markExpr*);
  510. function charExpr(c : Char) : RegExpr;
  511. var x : RegExpr;
  512. begin
  513. x := newExpr(char_node, sizeOf(Char));
  514. x^.c := c;
  515. charExpr := x
  516. end(*charExpr*);
  517. function strExpr(str : StrPtr) : RegExpr;
  518. var x : RegExpr;
  519. begin
  520. x := newExpr(str_node, sizeOf(StrPtr));
  521. x^.str := str;
  522. strExpr := x
  523. end(*strExpr*);
  524. function cclassExpr(cc : CClassPtr) : RegExpr;
  525. var x : RegExpr;
  526. begin
  527. x := newExpr(cclass_node, sizeOf(CClassPtr));
  528. x^.cc := cc;
  529. cclassExpr := x
  530. end(*cclassExpr*);
  531. function starExpr(r : RegExpr) : RegExpr;
  532. var x : RegExpr;
  533. begin
  534. x := newExpr(star_node, sizeOf(RegExpr));
  535. x^.r := r;
  536. starExpr := x
  537. end(*starExpr*);
  538. function plusExpr(r : RegExpr) : RegExpr;
  539. var x : RegExpr;
  540. begin
  541. x := newExpr(plus_node, sizeOf(RegExpr));
  542. x^.r := r;
  543. plusExpr := x
  544. end(*plusExpr*);
  545. function optExpr(r : RegExpr) : RegExpr;
  546. var x : RegExpr;
  547. begin
  548. x := newExpr(opt_node, sizeOf(RegExpr));
  549. x^.r := r;
  550. optExpr := x
  551. end(*optExpr*);
  552. function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
  553. var
  554. ri, rmn : RegExpr;
  555. i : Integer;
  556. begin
  557. if (m>n) or (n=0) then
  558. mnExpr := epsExpr
  559. else
  560. begin
  561. (* construct r^m: *)
  562. if m=0 then
  563. ri := epsExpr
  564. else
  565. begin
  566. ri := r;
  567. for i := 2 to m do
  568. ri := catExpr(ri, r);
  569. end;
  570. (* construct r{m,n}: *)
  571. rmn := ri; (* r{m,n} := r^m *)
  572. for i := m+1 to n do
  573. begin
  574. if is_epsExpr(ri) then
  575. ri := r
  576. else
  577. ri := catExpr(ri, r);
  578. rmn := altExpr(rmn, ri) (* r{m,n} := r{m,n} | r^i,
  579. i=m+1,...,n *)
  580. end;
  581. mnExpr := rmn
  582. end
  583. end(*mnExpr*);
  584. function catExpr(r1, r2 : RegExpr) : RegExpr;
  585. var x : RegExpr;
  586. begin
  587. x := newExpr(cat_node, 2*sizeOf(RegExpr));
  588. x^.r1 := r1;
  589. x^.r2 := r2;
  590. catExpr := x
  591. end(*catExpr*);
  592. function altExpr(r1, r2 : RegExpr) : RegExpr;
  593. var x : RegExpr;
  594. begin
  595. x := newExpr(alt_node, 2*sizeOf(RegExpr));
  596. x^.r1 := r1;
  597. x^.r2 := r2;
  598. altExpr := x
  599. end(*altExpr*);
  600. (* Unifiers for regular expressions: *)
  601. function is_epsExpr(r : RegExpr) : Boolean;
  602. begin
  603. is_epsExpr := r=epsExpr
  604. end(*is_epsExpr*);
  605. function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
  606. begin
  607. if r=epsExpr then
  608. is_markExpr := false
  609. else if r^.node_type=mark_node then
  610. begin
  611. is_markExpr := true;
  612. rule := r^.rule;
  613. pos := r^.pos;
  614. end
  615. else
  616. is_markExpr := false
  617. end(*is_markExpr*);
  618. function is_charExpr(r : RegExpr; var c : Char) : Boolean;
  619. begin
  620. if r=epsExpr then
  621. is_charExpr := false
  622. else if r^.node_type=char_node then
  623. begin
  624. is_charExpr := true;
  625. c := r^.c
  626. end
  627. else
  628. is_charExpr := false
  629. end(*is_charExpr*);
  630. function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
  631. begin
  632. if r=epsExpr then
  633. is_strExpr := false
  634. else if r^.node_type=str_node then
  635. begin
  636. is_strExpr := true;
  637. str := r^.str;
  638. end
  639. else
  640. is_strExpr := false
  641. end(*is_strExpr*);
  642. function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
  643. begin
  644. if r=epsExpr then
  645. is_cclassExpr := false
  646. else if r^.node_type=cclass_node then
  647. begin
  648. is_cclassExpr := true;
  649. cc := r^.cc
  650. end
  651. else
  652. is_cclassExpr := false
  653. end(*is_cclassExpr*);
  654. function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  655. begin
  656. if r=epsExpr then
  657. is_starExpr := false
  658. else if r^.node_type=star_node then
  659. begin
  660. is_starExpr := true;
  661. r1 := r^.r
  662. end
  663. else
  664. is_starExpr := false
  665. end(*is_starExpr*);
  666. function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  667. begin
  668. if r=epsExpr then
  669. is_plusExpr := false
  670. else if r^.node_type=plus_node then
  671. begin
  672. is_plusExpr := true;
  673. r1 := r^.r
  674. end
  675. else
  676. is_plusExpr := false
  677. end(*is_plusExpr*);
  678. function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  679. begin
  680. if r=epsExpr then
  681. is_optExpr := false
  682. else if r^.node_type=opt_node then
  683. begin
  684. is_optExpr := true;
  685. r1 := r^.r
  686. end
  687. else
  688. is_optExpr := false
  689. end(*is_optExpr*);
  690. function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  691. begin
  692. if r=epsExpr then
  693. is_catExpr := false
  694. else if r^.node_type=cat_node then
  695. begin
  696. is_catExpr := true;
  697. r1 := r^.r1;
  698. r2 := r^.r2
  699. end
  700. else
  701. is_catExpr := false
  702. end(*is_catExpr*);
  703. function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  704. begin
  705. if r=epsExpr then
  706. is_altExpr := false
  707. else if r^.node_type=alt_node then
  708. begin
  709. is_altExpr := true;
  710. r1 := r^.r1;
  711. r2 := r^.r2
  712. end
  713. else
  714. is_altExpr := false
  715. end(*is_altExpr*);
  716. (* Quicksort: *)
  717. procedure quicksort(lo, hi: Integer;
  718. less : OrderPredicate;
  719. swap : SwapProc);
  720. (* derived from the quicksort routine in QSORT.PAS in the Turbo Pascal
  721. distribution *)
  722. procedure sort(l, r: Integer);
  723. var i, j, k : Integer;
  724. begin
  725. i := l; j := r; k := (l+r) DIV 2;
  726. repeat
  727. while less(i, k) do inc(i);
  728. while less(k, j) do dec(j);
  729. if i<=j then
  730. begin
  731. swap(i, j);
  732. if k=i then k := j (* pivot element swapped! *)
  733. else if k=j then k := i;
  734. inc(i); dec(j);
  735. end;
  736. until i>j;
  737. if l<j then sort(l,j);
  738. if i<r then sort(i,r);
  739. end(*sort*);
  740. begin
  741. if lo<hi then sort(lo,hi);
  742. end(*quicksort*);
  743. (* Generic hash table routines: *)
  744. function hash(str : String; table_size : Integer) : Integer;
  745. (* computes a hash key for str *)
  746. var i, key : Integer;
  747. begin
  748. key := 0;
  749. for i := 1 to length(str) do
  750. inc(key, ord(str[i]));
  751. hash := key mod table_size + 1;
  752. end(*hash*);
  753. procedure newPos(var pos, incr, count : Integer; table_size : Integer);
  754. (* computes a new position in the table (quadratic collision strategy)
  755. - pos: current position (+inc)
  756. - incr: current increment (+2)
  757. - count: current number of collisions (+1)
  758. quadratic collision formula for position of str after n collisions:
  759. pos(str, n) = (hash(str)+n^2) mod table_size +1
  760. note that n^2-(n-1)^2 = 2n-1 <=> n^2 = (n-1)^2 + (2n-1) for n>0,
  761. i.e. the increment inc=2n-1 increments by two in each collision *)
  762. begin
  763. inc(count);
  764. inc(pos, incr);
  765. if pos>table_size then pos := pos mod table_size + 1;
  766. inc(incr, 2)
  767. end(*newPos*);
  768. function key(symbol : String;
  769. table_size : Integer;
  770. lookup : TableLookupProc;
  771. entry : TableEntryProc) : Integer;
  772. var pos, incr, count : Integer;
  773. begin
  774. pos := hash(symbol, table_size);
  775. incr := 1;
  776. count := 0;
  777. while count<=table_size do
  778. if lookup(pos)='' then
  779. begin
  780. entry(pos, symbol);
  781. key := pos;
  782. exit
  783. end
  784. else if lookup(pos)=symbol then
  785. begin
  786. key := pos;
  787. exit
  788. end
  789. else
  790. newPos(pos, incr, count, table_size);
  791. fatal(sym_table_overflow)
  792. end(*key*);
  793. function definedKey(symbol : String;
  794. table_size : Integer;
  795. lookup : TableLookupProc) : Boolean;
  796. var pos, incr, count : Integer;
  797. begin
  798. pos := hash(symbol, table_size);
  799. incr := 1;
  800. count := 0;
  801. while count<=table_size do
  802. if lookup(pos)='' then
  803. begin
  804. definedKey := false;
  805. exit
  806. end
  807. else if lookup(pos)=symbol then
  808. begin
  809. definedKey := true;
  810. exit
  811. end
  812. else
  813. newPos(pos, incr, count, table_size);
  814. definedKey := false
  815. end(*definedKey*);
  816. (* Utility routines: *)
  817. function min(i, j : Integer) : Integer;
  818. begin
  819. if i<j then
  820. min := i
  821. else
  822. min := j
  823. end(*min*);
  824. function max(i, j : Integer) : Integer;
  825. begin
  826. if i>j then
  827. max := i
  828. else
  829. max := j
  830. end(*max*);
  831. function nchars(cc : CClass) : Integer;
  832. var
  833. c : Char;
  834. count : Integer;
  835. begin
  836. count := 0;
  837. for c := #0 to #255 do if c in cc then inc(count);
  838. nchars := count;
  839. end(*nchars*);
  840. function upper(str : String) : String;
  841. var i : Integer;
  842. begin
  843. for i := 1 to length(str) do
  844. str[i] := upCase(str[i]);
  845. upper := str
  846. end(*upper*);
  847. function strip(str : String) : String;
  848. begin
  849. while (length(str)>0) and ((str[1]=' ') or (str[1]=tab)) do
  850. delete(str, 1, 1);
  851. while (length(str)>0) and
  852. ((str[length(str)]= ' ') or
  853. (str[length(str)]=tab)) do
  854. delete(str, length(str), 1);
  855. strip := str;
  856. end(*strip*);
  857. function blankStr(str : String) : String;
  858. var i : Integer;
  859. begin
  860. for i := 1 to length(str) do
  861. if str[i]<>tab then str[i] := ' ';
  862. blankStr := str;
  863. end(*blankStr*);
  864. function intStr(i : Integer) : String;
  865. var s : String;
  866. begin
  867. str(i, s);
  868. intStr := s
  869. end(*intStr*);
  870. function isInt(str : String; var i : Integer) : Boolean;
  871. var res : Integer;
  872. begin
  873. val(str, i, res);
  874. isInt := res = 0;
  875. end(*isInt*);
  876. function path(filename : String) : String;
  877. var i : Integer;
  878. begin
  879. i := length(filename);
  880. while (i>0) and (filename[i]<>'\') and (filename[i]<>':') do
  881. dec(i);
  882. path := copy(filename, 1, i);
  883. end(*path*);
  884. function root(filename : String) : String;
  885. var
  886. i : Integer;
  887. begin
  888. root := filename;
  889. for i := length(filename) downto 1 do
  890. case filename[i] of
  891. '.' :
  892. begin
  893. root := copy(filename, 1, i-1);
  894. exit
  895. end;
  896. '\': exit;
  897. else
  898. end;
  899. end(*addExt*);
  900. function addExt(filename, ext : String) : String;
  901. (* implemented with goto for maximum efficiency *)
  902. label x;
  903. var
  904. i : Integer;
  905. begin
  906. addExt := filename;
  907. for i := length(filename) downto 1 do
  908. case filename[i] of
  909. '.' : exit;
  910. '\': goto x;
  911. else
  912. end;
  913. x : addExt := filename+'.'+ext
  914. end(*addExt*);
  915. function file_size(filename : String) : LongInt;
  916. var f : File;
  917. begin
  918. assign(f, filename);
  919. reset(f, 1);
  920. if ioresult=0 then
  921. file_size := fileSize(f)
  922. else
  923. file_size := 0;
  924. close(f);
  925. end(*file_size*);
  926. (* Utility functions for list generating routines: *)
  927. function charStr(c : char; reserved : CClass) : String;
  928. function octStr(c : char) : String;
  929. (* return octal string representation of character c *)
  930. begin
  931. octStr := intStr(ord(c) div 64)+intStr((ord(c) mod 64) div 8)+
  932. intStr(ord(c) mod 8);
  933. end(*octStr*);
  934. begin
  935. case c of
  936. #0..#7, (* nonprintable characters *)
  937. #11,#14..#31,
  938. #127..#255 : charStr := '\'+octStr(c);
  939. bs : charStr := '\b';
  940. tab : charStr := '\t';
  941. nl : charStr := '\n';
  942. cr : charStr := '\c';
  943. ff : charStr := '\f';
  944. '\' : charStr := '\\';
  945. else if c in reserved then
  946. charStr := '\'+c
  947. else
  948. charStr := c
  949. end
  950. end(*charStr*);
  951. function singleQuoteStr(str : String) : String;
  952. var
  953. i : Integer;
  954. str1 : String;
  955. begin
  956. str1 := '';
  957. for i := 1 to length(str) do
  958. str1 := str1+charStr(str[i], ['''']);
  959. singleQuoteStr := ''''+str1+''''
  960. end(*singleQuoteStr*);
  961. function doubleQuoteStr(str : String) : String;
  962. var
  963. i : Integer;
  964. str1 : String;
  965. begin
  966. str1 := '';
  967. for i := 1 to length(str) do
  968. str1 := str1+charStr(str[i], ['"']);
  969. doubleQuoteStr := '"'+str1+'"'
  970. end(*doubleQuoteStr*);
  971. function cclassStr(cc : CClass) : String;
  972. const
  973. reserved : CClass = ['^','-',']'];
  974. MaxChar = #255;
  975. var
  976. c1, c2 : Char;
  977. str : String;
  978. Quit: Boolean;
  979. begin
  980. if cc=[#1..#255]-[nl] then
  981. cclassStr := '.'
  982. else
  983. begin
  984. str := '';
  985. if nchars(cc)>128 then
  986. begin
  987. str := '^';
  988. cc := [#0..#255]-cc;
  989. end;
  990. c1 := chr(0);
  991. Quit := False;
  992. while not Quit do begin
  993. if c1 in cc then begin
  994. c2 := c1;
  995. while (c2<MaxChar) and (succ(c2) in cc) do
  996. c2 := succ(c2);
  997. if c1=c2
  998. then str := str+charStr(c1, reserved)
  999. else
  1000. if c2=succ(c1)
  1001. then str := str+charStr(c1, reserved)+charStr(c2, reserved)
  1002. else str := str+charStr(c1, reserved)+'-'+charStr(c2, reserved);
  1003. c1 := c2;
  1004. end;
  1005. Quit := c1 = MaxChar;
  1006. if not Quit then
  1007. c1 := Succ(c1);
  1008. end; { of while }
  1009. cclassStr := '['+str+']'
  1010. end
  1011. end(*cclassStr*);
  1012. function cclassOrCharStr(cc : CClass) : String;
  1013. var count : Integer;
  1014. c, c1 : Char;
  1015. begin
  1016. count := 0;
  1017. for c := #0 to #255 do
  1018. if c in cc then
  1019. begin
  1020. c1 := c;
  1021. inc(count);
  1022. if count>1 then
  1023. begin
  1024. cclassOrCharStr := cclassStr(cc);
  1025. exit;
  1026. end;
  1027. end;
  1028. if count=1 then
  1029. cclassOrCharStr := singleQuoteStr(c1)
  1030. else
  1031. cclassOrCharStr := '[]';
  1032. end(*cclassOrCharStr*);
  1033. function regExprStr(r : RegExpr) : String;
  1034. function unparseExpr(r : RegExpr) : String;
  1035. var rule_no, pos : Integer;
  1036. c : Char;
  1037. str : StrPtr;
  1038. cc : CClassPtr;
  1039. r1, r2 : RegExpr;
  1040. begin
  1041. if is_epsExpr(r) then
  1042. unparseExpr := ''
  1043. else if is_markExpr(r, rule_no, pos) then
  1044. unparseExpr := '#('+intStr(rule_no)+','+intStr(pos)+')'
  1045. else if is_charExpr(r, c) then
  1046. unparseExpr := charStr(c, [ '"','.','^','$','[',']','*','+','?',
  1047. '{','}','|','(',')','/','<','>'])
  1048. else if is_strExpr(r, str) then
  1049. unparseExpr := doubleQuoteStr(str^)
  1050. else if is_cclassExpr(r, cc) then
  1051. unparseExpr := cclassStr(cc^)
  1052. else if is_starExpr(r, r1) then
  1053. unparseExpr := unparseExpr(r1)+'*'
  1054. else if is_plusExpr(r, r1) then
  1055. unparseExpr := unparseExpr(r1)+'+'
  1056. else if is_optExpr(r, r1) then
  1057. unparseExpr := unparseExpr(r1)+'?'
  1058. else if is_catExpr(r, r1, r2) then
  1059. unparseExpr := '('+unparseExpr(r1)+unparseExpr(r2)+')'
  1060. else if is_altExpr(r, r1, r2) then
  1061. unparseExpr := '('+unparseExpr(r1)+'|'+unparseExpr(r2)+')'
  1062. else
  1063. fatal('invalid expression');
  1064. end(*unparseExpr*);
  1065. begin
  1066. regExprStr := unparseExpr(r);
  1067. end(*regExprStr*);
  1068. end(*LexBase*).