lexbase.pas 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168
  1. {
  2. This module collects the basic data types and operations used in the TP
  3. Lex program, and other basic stuff that does not belong anywhere else:
  4. - Lex input and output files and corresponding bookkeeping information
  5. used by the parser
  6. - symbolic character constants
  7. - dynamically allocated strings and character classes
  8. - integer sets
  9. - generic quicksort and hash table routines
  10. - utilities for list-generating
  11. - other tiny utilities
  12. Copyright (c) 1990-92 Albert Graef <[email protected]>
  13. Copyright (C) 1996 Berend de Boer <[email protected]>
  14. This program is free software; you can redistribute it and/or modify
  15. it under the terms of the GNU General Public License as published by
  16. the Free Software Foundation; either version 2 of the License, or
  17. (at your option) any later version.
  18. This program is distributed in the hope that it will be useful,
  19. but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. GNU General Public License for more details.
  22. You should have received a copy of the GNU General Public License
  23. along with this program; if not, write to the Free Software
  24. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  25. $Revision: 1.2 $
  26. $Modtime: 96-08-01 10:21 $
  27. $History: LEXBASE.PAS $
  28. *
  29. * ***************** Version 2 *****************
  30. * User: Berend Date: 96-10-10 Time: 21:16
  31. * Updated in $/Lex and Yacc/tply
  32. * Updated for protected mode, windows and Delphi 1.X and 2.X.
  33. }
  34. unit LexBase;
  35. interface
  36. const
  37. (* symbolic character constants: *)
  38. bs = #8; (* backspace character *)
  39. tab = #9; (* tab character *)
  40. nl = #10; (* newline character *)
  41. cr = #13; (* carriage return *)
  42. ff = #12; (* form feed character *)
  43. var
  44. (* Filenames: *)
  45. lfilename : String;
  46. pasfilename : String;
  47. lstfilename : String;
  48. codfilename : String;
  49. codfilepath1,
  50. codfilepath2 : String; { Under linux, binary and conf file
  51. are not in the same path}
  52. (* Lex input, output, list and code template file: *)
  53. yyin, yylst, yyout, yycod : Text;
  54. (* the following values are initialized and updated by the parser: *)
  55. line : String; (* current input line *)
  56. lno : Integer; (* current line number *)
  57. const
  58. max_elems = 1000; (* maximum size of integer sets *)
  59. type
  60. (* String and character class pointers: *)
  61. StrPtr = ^String;
  62. CClass = set of Char;
  63. CClassPtr = ^CClass;
  64. (* Sorted integer sets: *)
  65. IntSet = array [0..max_elems] of Integer;
  66. (* word 0 is size *)
  67. IntSetPtr = ^IntSet;
  68. (* Regular expressions: *)
  69. RegExpr = ^Node;
  70. NodeType = (mark_node, (* marker node *)
  71. char_node, (* character node *)
  72. str_node, (* string node *)
  73. cclass_node, (* character class node *)
  74. star_node, (* star node *)
  75. plus_node, (* plus node *)
  76. opt_node, (* option node *)
  77. cat_node, (* concatenation node *)
  78. alt_node); (* alternatives node (|) *)
  79. Node = record case node_type : NodeType of
  80. mark_node : (rule, pos : Integer);
  81. char_node : (c : Char);
  82. str_node : (str : StrPtr);
  83. cclass_node : (cc : CClassPtr);
  84. star_node, plus_node, opt_node : (r : RegExpr);
  85. cat_node, alt_node : (r1, r2 : RegExpr);
  86. end;
  87. (* Some standard character classes: *)
  88. const
  89. letters : CClass = ['A'..'Z','a'..'z','_'];
  90. digits : CClass = ['0'..'9'];
  91. alphanums : CClass = ['A'..'Z','a'..'z','_','0'..'9'];
  92. (* Operations: *)
  93. (* Strings and character classes: *)
  94. function newStr(str : String) : StrPtr;
  95. (* creates a string pointer (only the space actually needed for the given
  96. string is allocated) *)
  97. function newCClass(cc : CClass) : CClassPtr;
  98. (* creates a CClass pointer *)
  99. (* Integer sets (set arguments are passed by reference even if they are not
  100. modified, for greater efficiency): *)
  101. procedure empty(var M : IntSet);
  102. (* initializes M as empty *)
  103. procedure singleton(var M : IntSet; i : Integer);
  104. (* initializes M as a singleton set containing the element i *)
  105. procedure include(var M : IntSet; i : Integer);
  106. (* include i in M *)
  107. procedure exclude(var M : IntSet; i : Integer);
  108. (* exclude i from M *)
  109. procedure setunion(var M, N : IntSet);
  110. (* adds N to M *)
  111. procedure setminus(var M, N : IntSet);
  112. (* removes N from M *)
  113. procedure intersect(var M, N : IntSet);
  114. (* removes from M all elements NOT in N *)
  115. function size(var M : IntSet) : Integer;
  116. (* cardinality of set M *)
  117. function member(i : Integer; var M : IntSet) : Boolean;
  118. (* tests for membership of i in M *)
  119. function isempty(var M : IntSet) : Boolean;
  120. (* checks whether M is an empty set *)
  121. function equal(var M, N : IntSet) : Boolean;
  122. (* checks whether M and N are equal *)
  123. function subseteq(var M, N : IntSet) : Boolean;
  124. (* checks whether M is a subset of N *)
  125. function newIntSet : IntSetPtr;
  126. (* creates a pointer to an empty integer set *)
  127. (* Constructors for regular expressions: *)
  128. const epsExpr : RegExpr = nil;
  129. (* empty regular expression *)
  130. function markExpr(rule, pos : Integer) : RegExpr;
  131. (* markers are used to denote endmarkers of rules, as well as other
  132. special positions in rules, e.g. the position of the lookahead
  133. operator; they are considered nullable; by convention, we use
  134. the following pos numbers:
  135. - 0: endmarker position
  136. - 1: lookahead operator position *)
  137. function charExpr(c : Char) : RegExpr;
  138. (* character c *)
  139. function strExpr(str : StrPtr) : RegExpr;
  140. (* "str" *)
  141. function cclassExpr(cc : CClassPtr) : RegExpr;
  142. (* [str] where str are the literals in cc *)
  143. function starExpr(r : RegExpr) : RegExpr;
  144. (* r* *)
  145. function plusExpr(r : RegExpr) : RegExpr;
  146. (* r+ *)
  147. function optExpr(r : RegExpr) : RegExpr;
  148. (* r? *)
  149. function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
  150. (* constructor expanding expression r{m,n} to the corresponding
  151. alt expression r^m|...|r^n *)
  152. function catExpr(r1, r2 : RegExpr) : RegExpr;
  153. (* r1r2 *)
  154. function altExpr(r1, r2 : RegExpr) : RegExpr;
  155. (* r1|r2 *)
  156. (* Unifiers for regular expressions:
  157. The following predicates check whether the specified regular
  158. expression r is of the denoted type; if the predicate succeeds,
  159. the other arguments of the predicate are instantiated to the
  160. corresponding values. *)
  161. function is_epsExpr(r : RegExpr) : Boolean;
  162. (* empty regular expression *)
  163. function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
  164. (* marker expression *)
  165. function is_charExpr(r : RegExpr; var c : Char) : Boolean;
  166. (* character c *)
  167. function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
  168. (* "str" *)
  169. function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
  170. (* [str] where str are the literals in cc *)
  171. function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  172. (* r1* *)
  173. function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  174. (* r1+ *)
  175. function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  176. (* r1? *)
  177. function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  178. (* r1r2 *)
  179. function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  180. (* r1|r2 *)
  181. (* Quicksort: *)
  182. type
  183. OrderPredicate = function (i, j : Integer) : Boolean;
  184. SwapProc = procedure (i, j : Integer);
  185. procedure quicksort(lo, hi: Integer;
  186. less : OrderPredicate;
  187. swap : SwapProc);
  188. (* General inplace sorting procedure based on the quicksort algorithm.
  189. This procedure can be applied to any sequential data structure;
  190. only the corresponding routines less which compares, and swap which
  191. swaps two elements i,j of the target data structure, must be
  192. supplied as appropriate for the target data structure.
  193. - lo, hi: the lower and higher indices, indicating the elements to
  194. be sorted
  195. - less(i, j): should return true if element no. i `is less than'
  196. element no. j, and false otherwise; any total quasi-ordering may
  197. be supplied here (if neither less(i, j) nor less(j, i) then elements
  198. i and j are assumed to be `equal').
  199. - swap(i, j): should swap the elements with index i and j *)
  200. (* Generic hash table routines (based on quadratic rehashing; hence the
  201. table size must be a prime number): *)
  202. type
  203. TableLookupProc = function(k : Integer) : String;
  204. TableEntryProc = procedure(k : Integer; symbol : String);
  205. function key(symbol : String;
  206. table_size : Integer;
  207. lookup : TableLookupProc;
  208. entry : TableEntryProc) : Integer;
  209. (* returns a hash table key for symbol; inserts the symbol into the
  210. table if necessary
  211. - table_size is the symbol table size and must be a fixed prime number
  212. - lookup is the table lookup procedure which should return the string
  213. at key k in the table ('' if entry is empty)
  214. - entry is the table entry procedure which is assumed to store the
  215. given symbol at the given location *)
  216. function definedKey(symbol : String;
  217. table_size : Integer;
  218. lookup : TableLookupProc) : Boolean;
  219. (* checks the table to see if symbol is in the table *)
  220. (* Utility routines: *)
  221. function min(i, j : Integer) : Integer;
  222. function max(i, j : Integer) : Integer;
  223. (* minimum and maximum of two integers *)
  224. function nchars(cc : CClass) : Integer;
  225. (* returns the cardinality (number of characters) of a character class *)
  226. function upper(str : String) : String;
  227. (* returns str converted to uppercase *)
  228. function strip(str : String) : String;
  229. (* returns str with leading and trailing blanks stripped off *)
  230. function blankStr(str : String) : String;
  231. (* returns string of same length as str, with all non-whitespace characters
  232. replaced by blanks *)
  233. function intStr(i : Integer) : String;
  234. (* returns the string representation of i *)
  235. function isInt(str : String; var i : Integer) : Boolean;
  236. (* checks whether str represents an integer; if so, returns the
  237. value of it in i *)
  238. function path(filename : String) : String;
  239. (* returns the path in filename *)
  240. function root(filename : String) : String;
  241. (* returns root (i.e. extension stripped from filename) of
  242. filename *)
  243. function addExt(filename, ext : String) : String;
  244. (* if filename has no extension and last filename character is not '.',
  245. add extension ext to filename *)
  246. function file_size(filename : String) : LongInt;
  247. (* determines file size in bytes *)
  248. (* Utility functions for list generating routines: *)
  249. function charStr(c : char; reserved : CClass) : String;
  250. (* returns a print name for character c, using the standard escape
  251. conventions; reserved is the class of `reserved' special characters
  252. which should be quoted with \ (\ itself is always quoted) *)
  253. function singleQuoteStr(str : String) : String;
  254. (* returns print name of str enclosed in single quotes, using the
  255. standard escape conventions *)
  256. function doubleQuoteStr(str : String) : String;
  257. (* returns print name of str enclosed in double quotes, using the
  258. standard escape conventions *)
  259. function cclassStr(cc : CClass) : String;
  260. (* returns print name of character class cc, using the standard escape
  261. conventions; if cc contains more than 128 elements, the complement
  262. notation (^) is used; if cc is the class of all (non-null) characters
  263. except newline, the period notation is used *)
  264. function cclassOrCharStr(cc : CClass) : String;
  265. (* returns a print name for character class cc (either cclassStr, or,
  266. if cc contains only one element, character in single quotes) *)
  267. function regExprStr(r : RegExpr) : String;
  268. (* unparses a regular expression *)
  269. implementation
  270. uses LexMsgs;
  271. (* String and character class pointers: *)
  272. function newStr(str : String) : StrPtr;
  273. var strp : StrPtr;
  274. begin
  275. getmem(strp, succ(length(str)));
  276. move(str, strp^, succ(length(str)));
  277. newStr := strp;
  278. end(*newStr*);
  279. function newCClass(cc : CClass) : CClassPtr;
  280. var ccp : CClassPtr;
  281. begin
  282. new(ccp);
  283. ccp^ := cc;
  284. newCClass := ccp;
  285. end(*newCClass*);
  286. (* Integer sets: *)
  287. procedure empty(var M : IntSet);
  288. begin
  289. M[0] := 0;
  290. end(*empty*);
  291. procedure singleton(var M : IntSet; i : Integer);
  292. begin
  293. M[0] := 1; M[1] := i;
  294. end(*singleton*);
  295. procedure include(var M : IntSet; i : Integer);
  296. var l, r, k : Integer;
  297. begin
  298. (* binary search: *)
  299. l := 1; r := M[0];
  300. k := l + (r-l) div 2;
  301. while (l<r) and (M[k]<>i) do
  302. begin
  303. if M[k]<i then
  304. l := succ(k)
  305. else
  306. r := pred(k);
  307. k := l + (r-l) div 2;
  308. end;
  309. if (k>M[0]) or (M[k]<>i) then
  310. begin
  311. if M[0]>=max_elems then fatal(intset_overflow);
  312. if (k<=M[0]) and (M[k]<i) then
  313. begin
  314. move(M[k+1], M[k+2], (M[0]-k)*sizeOf(Integer));
  315. M[k+1] := i;
  316. end
  317. else
  318. begin
  319. move(M[k], M[k+1], (M[0]-k+1)*sizeOf(Integer));
  320. M[k] := i;
  321. end;
  322. inc(M[0]);
  323. end;
  324. end(*include*);
  325. procedure exclude(var M : IntSet; i : Integer);
  326. var l, r, k : Integer;
  327. begin
  328. (* binary search: *)
  329. l := 1; r := M[0];
  330. k := l + (r-l) div 2;
  331. while (l<r) and (M[k]<>i) do
  332. begin
  333. if M[k]<i then
  334. l := succ(k)
  335. else
  336. r := pred(k);
  337. k := l + (r-l) div 2;
  338. end;
  339. if (k<=M[0]) and (M[k]=i) then
  340. begin
  341. move(M[k+1], M[k], (M[0]-k)*sizeOf(Integer));
  342. dec(M[0]);
  343. end;
  344. end(*exclude*);
  345. procedure setunion(var M, N : IntSet);
  346. var
  347. K : IntSet;
  348. i, j, i_M, i_N : Integer;
  349. begin
  350. (* merge sort: *)
  351. i := 0; i_M := 1; i_N := 1;
  352. while (i_M<=M[0]) and (i_N<=N[0]) do
  353. begin
  354. inc(i);
  355. if i>max_elems then fatal(intset_overflow);
  356. if M[i_M]<N[i_N] then
  357. begin
  358. K[i] := M[i_M]; inc(i_M);
  359. end
  360. else if N[i_N]<M[i_M] then
  361. begin
  362. K[i] := N[i_N]; inc(i_N);
  363. end
  364. else
  365. begin
  366. K[i] := M[i_M]; inc(i_M); inc(i_N);
  367. end
  368. end;
  369. for j := i_M to M[0] do
  370. begin
  371. inc(i);
  372. if i>max_elems then fatal(intset_overflow);
  373. K[i] := M[j];
  374. end;
  375. for j := i_N to N[0] do
  376. begin
  377. inc(i);
  378. if i>max_elems then fatal(intset_overflow);
  379. K[i] := N[j];
  380. end;
  381. K[0] := i;
  382. move(K, M, succ(i)*sizeOf(Integer));
  383. end(*setunion*);
  384. procedure setminus(var M, N : IntSet);
  385. var
  386. K : IntSet;
  387. i, i_M, i_N : Integer;
  388. begin
  389. i := 0; i_N := 1;
  390. for i_M := 1 to M[0] do
  391. begin
  392. while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
  393. if (i_N>N[0]) or (N[i_N]>M[i_M]) then
  394. begin
  395. inc(i);
  396. K[i] := M[i_M];
  397. end
  398. else
  399. inc(i_N);
  400. end;
  401. K[0] := i;
  402. move(K, M, succ(i)*sizeOf(Integer));
  403. end(*setminus*);
  404. procedure intersect(var M, N : IntSet);
  405. var
  406. K : IntSet;
  407. i, i_M, i_N : Integer;
  408. begin
  409. i := 0; i_N := 1;
  410. for i_M := 1 to M[0] do
  411. begin
  412. while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
  413. if (i_N<=N[0]) and (N[i_N]=M[i_M]) then
  414. begin
  415. inc(i);
  416. K[i] := M[i_M];
  417. inc(i_N);
  418. end
  419. end;
  420. K[0] := i;
  421. move(K, M, succ(i)*sizeOf(Integer));
  422. end(*intersect*);
  423. function size(var M : IntSet) : Integer;
  424. begin
  425. size := M[0]
  426. end(*size*);
  427. function member(i : Integer; var M : IntSet) : Boolean;
  428. var l, r, k : Integer;
  429. begin
  430. (* binary search: *)
  431. l := 1; r := M[0];
  432. k := l + (r-l) div 2;
  433. while (l<r) and (M[k]<>i) do
  434. begin
  435. if M[k]<i then
  436. l := succ(k)
  437. else
  438. r := pred(k);
  439. k := l + (r-l) div 2;
  440. end;
  441. member := (k<=M[0]) and (M[k]=i);
  442. end(*member*);
  443. function isempty(var M : IntSet) : Boolean;
  444. begin
  445. isempty := M[0]=0
  446. end(*isempty*);
  447. function equal(var M, N : IntSet) : Boolean;
  448. var i : Integer;
  449. begin
  450. if M[0]<>N[0] then
  451. equal := false
  452. else
  453. begin
  454. for i := 1 to M[0] do
  455. if M[i]<>N[i] then
  456. begin
  457. equal := false;
  458. exit
  459. end;
  460. equal := true
  461. end
  462. end(*equal*);
  463. function subseteq(var M, N : IntSet) : Boolean;
  464. var
  465. i_M, i_N : Integer;
  466. begin
  467. if M[0]>N[0] then
  468. subseteq := false
  469. else
  470. begin
  471. i_N := 1;
  472. for i_M := 1 to M[0] do
  473. begin
  474. while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
  475. if (i_N>N[0]) or (N[i_N]>M[i_M]) then
  476. begin
  477. subseteq := false;
  478. exit
  479. end
  480. else
  481. inc(i_N);
  482. end;
  483. subseteq := true
  484. end;
  485. end(*subseteq*);
  486. function newIntSet : IntSetPtr;
  487. var
  488. MP : IntSetPtr;
  489. begin
  490. getmem(MP, (max_elems+1)*sizeOf(Integer));
  491. MP^[0] := 0;
  492. newIntSet := MP
  493. end(*newIntSet*);
  494. (* Constructors for regular expressions: *)
  495. function newExpr(node_type : NodeType; n : Integer) : RegExpr;
  496. (* returns new RegExpr node (n: number of bytes to allocate) *)
  497. var x : RegExpr;
  498. begin
  499. getmem(x, sizeOf(NodeType)+n);
  500. x^.node_type := node_type;
  501. newExpr := x
  502. end(*newExpr*);
  503. function markExpr(rule, pos : Integer) : RegExpr;
  504. var x : RegExpr;
  505. begin
  506. x := newExpr(mark_node, 2*sizeOf(Integer));
  507. x^.rule := rule;
  508. x^.pos := pos;
  509. markExpr := x
  510. end(*markExpr*);
  511. function charExpr(c : Char) : RegExpr;
  512. var x : RegExpr;
  513. begin
  514. x := newExpr(char_node, sizeOf(Char));
  515. x^.c := c;
  516. charExpr := x
  517. end(*charExpr*);
  518. function strExpr(str : StrPtr) : RegExpr;
  519. var x : RegExpr;
  520. begin
  521. x := newExpr(str_node, sizeOf(StrPtr));
  522. x^.str := str;
  523. strExpr := x
  524. end(*strExpr*);
  525. function cclassExpr(cc : CClassPtr) : RegExpr;
  526. var x : RegExpr;
  527. begin
  528. x := newExpr(cclass_node, sizeOf(CClassPtr));
  529. x^.cc := cc;
  530. cclassExpr := x
  531. end(*cclassExpr*);
  532. function starExpr(r : RegExpr) : RegExpr;
  533. var x : RegExpr;
  534. begin
  535. x := newExpr(star_node, sizeOf(RegExpr));
  536. x^.r := r;
  537. starExpr := x
  538. end(*starExpr*);
  539. function plusExpr(r : RegExpr) : RegExpr;
  540. var x : RegExpr;
  541. begin
  542. x := newExpr(plus_node, sizeOf(RegExpr));
  543. x^.r := r;
  544. plusExpr := x
  545. end(*plusExpr*);
  546. function optExpr(r : RegExpr) : RegExpr;
  547. var x : RegExpr;
  548. begin
  549. x := newExpr(opt_node, sizeOf(RegExpr));
  550. x^.r := r;
  551. optExpr := x
  552. end(*optExpr*);
  553. function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
  554. var
  555. ri, rmn : RegExpr;
  556. i : Integer;
  557. begin
  558. if (m>n) or (n=0) then
  559. mnExpr := epsExpr
  560. else
  561. begin
  562. (* construct r^m: *)
  563. if m=0 then
  564. ri := epsExpr
  565. else
  566. begin
  567. ri := r;
  568. for i := 2 to m do
  569. ri := catExpr(ri, r);
  570. end;
  571. (* construct r{m,n}: *)
  572. rmn := ri; (* r{m,n} := r^m *)
  573. for i := m+1 to n do
  574. begin
  575. if is_epsExpr(ri) then
  576. ri := r
  577. else
  578. ri := catExpr(ri, r);
  579. rmn := altExpr(rmn, ri) (* r{m,n} := r{m,n} | r^i,
  580. i=m+1,...,n *)
  581. end;
  582. mnExpr := rmn
  583. end
  584. end(*mnExpr*);
  585. function catExpr(r1, r2 : RegExpr) : RegExpr;
  586. var x : RegExpr;
  587. begin
  588. x := newExpr(cat_node, 2*sizeOf(RegExpr));
  589. x^.r1 := r1;
  590. x^.r2 := r2;
  591. catExpr := x
  592. end(*catExpr*);
  593. function altExpr(r1, r2 : RegExpr) : RegExpr;
  594. var x : RegExpr;
  595. begin
  596. x := newExpr(alt_node, 2*sizeOf(RegExpr));
  597. x^.r1 := r1;
  598. x^.r2 := r2;
  599. altExpr := x
  600. end(*altExpr*);
  601. (* Unifiers for regular expressions: *)
  602. function is_epsExpr(r : RegExpr) : Boolean;
  603. begin
  604. is_epsExpr := r=epsExpr
  605. end(*is_epsExpr*);
  606. function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
  607. begin
  608. if r=epsExpr then
  609. is_markExpr := false
  610. else if r^.node_type=mark_node then
  611. begin
  612. is_markExpr := true;
  613. rule := r^.rule;
  614. pos := r^.pos;
  615. end
  616. else
  617. is_markExpr := false
  618. end(*is_markExpr*);
  619. function is_charExpr(r : RegExpr; var c : Char) : Boolean;
  620. begin
  621. if r=epsExpr then
  622. is_charExpr := false
  623. else if r^.node_type=char_node then
  624. begin
  625. is_charExpr := true;
  626. c := r^.c
  627. end
  628. else
  629. is_charExpr := false
  630. end(*is_charExpr*);
  631. function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
  632. begin
  633. if r=epsExpr then
  634. is_strExpr := false
  635. else if r^.node_type=str_node then
  636. begin
  637. is_strExpr := true;
  638. str := r^.str;
  639. end
  640. else
  641. is_strExpr := false
  642. end(*is_strExpr*);
  643. function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
  644. begin
  645. if r=epsExpr then
  646. is_cclassExpr := false
  647. else if r^.node_type=cclass_node then
  648. begin
  649. is_cclassExpr := true;
  650. cc := r^.cc
  651. end
  652. else
  653. is_cclassExpr := false
  654. end(*is_cclassExpr*);
  655. function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  656. begin
  657. if r=epsExpr then
  658. is_starExpr := false
  659. else if r^.node_type=star_node then
  660. begin
  661. is_starExpr := true;
  662. r1 := r^.r
  663. end
  664. else
  665. is_starExpr := false
  666. end(*is_starExpr*);
  667. function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  668. begin
  669. if r=epsExpr then
  670. is_plusExpr := false
  671. else if r^.node_type=plus_node then
  672. begin
  673. is_plusExpr := true;
  674. r1 := r^.r
  675. end
  676. else
  677. is_plusExpr := false
  678. end(*is_plusExpr*);
  679. function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
  680. begin
  681. if r=epsExpr then
  682. is_optExpr := false
  683. else if r^.node_type=opt_node then
  684. begin
  685. is_optExpr := true;
  686. r1 := r^.r
  687. end
  688. else
  689. is_optExpr := false
  690. end(*is_optExpr*);
  691. function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  692. begin
  693. if r=epsExpr then
  694. is_catExpr := false
  695. else if r^.node_type=cat_node then
  696. begin
  697. is_catExpr := true;
  698. r1 := r^.r1;
  699. r2 := r^.r2
  700. end
  701. else
  702. is_catExpr := false
  703. end(*is_catExpr*);
  704. function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
  705. begin
  706. if r=epsExpr then
  707. is_altExpr := false
  708. else if r^.node_type=alt_node then
  709. begin
  710. is_altExpr := true;
  711. r1 := r^.r1;
  712. r2 := r^.r2
  713. end
  714. else
  715. is_altExpr := false
  716. end(*is_altExpr*);
  717. (* Quicksort: *)
  718. procedure quicksort(lo, hi: Integer;
  719. less : OrderPredicate;
  720. swap : SwapProc);
  721. (* derived from the quicksort routine in QSORT.PAS in the Turbo Pascal
  722. distribution *)
  723. procedure sort(l, r: Integer);
  724. var i, j, k : Integer;
  725. begin
  726. i := l; j := r; k := (l+r) DIV 2;
  727. repeat
  728. while less(i, k) do inc(i);
  729. while less(k, j) do dec(j);
  730. if i<=j then
  731. begin
  732. swap(i, j);
  733. if k=i then k := j (* pivot element swapped! *)
  734. else if k=j then k := i;
  735. inc(i); dec(j);
  736. end;
  737. until i>j;
  738. if l<j then sort(l,j);
  739. if i<r then sort(i,r);
  740. end(*sort*);
  741. begin
  742. if lo<hi then sort(lo,hi);
  743. end(*quicksort*);
  744. (* Generic hash table routines: *)
  745. function hash(str : String; table_size : Integer) : Integer;
  746. (* computes a hash key for str *)
  747. var i, key : Integer;
  748. begin
  749. key := 0;
  750. for i := 1 to length(str) do
  751. inc(key, ord(str[i]));
  752. hash := key mod table_size + 1;
  753. end(*hash*);
  754. procedure newPos(var pos, incr, count : Integer; table_size : Integer);
  755. (* computes a new position in the table (quadratic collision strategy)
  756. - pos: current position (+inc)
  757. - incr: current increment (+2)
  758. - count: current number of collisions (+1)
  759. quadratic collision formula for position of str after n collisions:
  760. pos(str, n) = (hash(str)+n^2) mod table_size +1
  761. note that n^2-(n-1)^2 = 2n-1 <=> n^2 = (n-1)^2 + (2n-1) for n>0,
  762. i.e. the increment inc=2n-1 increments by two in each collision *)
  763. begin
  764. inc(count);
  765. inc(pos, incr);
  766. if pos>table_size then pos := pos mod table_size + 1;
  767. inc(incr, 2)
  768. end(*newPos*);
  769. function key(symbol : String;
  770. table_size : Integer;
  771. lookup : TableLookupProc;
  772. entry : TableEntryProc) : Integer;
  773. var pos, incr, count : Integer;
  774. begin
  775. pos := hash(symbol, table_size);
  776. incr := 1;
  777. count := 0;
  778. while count<=table_size do
  779. if lookup(pos)='' then
  780. begin
  781. entry(pos, symbol);
  782. key := pos;
  783. exit
  784. end
  785. else if lookup(pos)=symbol then
  786. begin
  787. key := pos;
  788. exit
  789. end
  790. else
  791. newPos(pos, incr, count, table_size);
  792. fatal(sym_table_overflow)
  793. end(*key*);
  794. function definedKey(symbol : String;
  795. table_size : Integer;
  796. lookup : TableLookupProc) : Boolean;
  797. var pos, incr, count : Integer;
  798. begin
  799. pos := hash(symbol, table_size);
  800. incr := 1;
  801. count := 0;
  802. while count<=table_size do
  803. if lookup(pos)='' then
  804. begin
  805. definedKey := false;
  806. exit
  807. end
  808. else if lookup(pos)=symbol then
  809. begin
  810. definedKey := true;
  811. exit
  812. end
  813. else
  814. newPos(pos, incr, count, table_size);
  815. definedKey := false
  816. end(*definedKey*);
  817. (* Utility routines: *)
  818. function min(i, j : Integer) : Integer;
  819. begin
  820. if i<j then
  821. min := i
  822. else
  823. min := j
  824. end(*min*);
  825. function max(i, j : Integer) : Integer;
  826. begin
  827. if i>j then
  828. max := i
  829. else
  830. max := j
  831. end(*max*);
  832. function nchars(cc : CClass) : Integer;
  833. var
  834. c : Char;
  835. count : Integer;
  836. begin
  837. count := 0;
  838. for c := #0 to #255 do if c in cc then inc(count);
  839. nchars := count;
  840. end(*nchars*);
  841. function upper(str : String) : String;
  842. var i : Integer;
  843. begin
  844. for i := 1 to length(str) do
  845. str[i] := upCase(str[i]);
  846. upper := str
  847. end(*upper*);
  848. function strip(str : String) : String;
  849. begin
  850. while (length(str)>0) and ((str[1]=' ') or (str[1]=tab)) do
  851. delete(str, 1, 1);
  852. while (length(str)>0) and
  853. ((str[length(str)]= ' ') or
  854. (str[length(str)]=tab)) do
  855. delete(str, length(str), 1);
  856. strip := str;
  857. end(*strip*);
  858. function blankStr(str : String) : String;
  859. var i : Integer;
  860. begin
  861. for i := 1 to length(str) do
  862. if str[i]<>tab then str[i] := ' ';
  863. blankStr := str;
  864. end(*blankStr*);
  865. function intStr(i : Integer) : String;
  866. var s : String;
  867. begin
  868. str(i, s);
  869. intStr := s
  870. end(*intStr*);
  871. function isInt(str : String; var i : Integer) : Boolean;
  872. var res : Integer;
  873. begin
  874. val(str, i, res);
  875. isInt := res = 0;
  876. end(*isInt*);
  877. function path(filename : String) : String;
  878. var i : Integer;
  879. begin
  880. i := length(filename);
  881. while (i>0) and (filename[i]<>DirectorySeparator) and (filename[i]<>':') do
  882. dec(i);
  883. path := copy(filename, 1, i);
  884. end(*path*);
  885. function root(filename : String) : String;
  886. var
  887. i : Integer;
  888. begin
  889. root := filename;
  890. for i := length(filename) downto 1 do
  891. case filename[i] of
  892. '.' :
  893. begin
  894. root := copy(filename, 1, i-1);
  895. exit
  896. end;
  897. DirectorySeparator : exit;
  898. else
  899. end;
  900. end(*root*);
  901. function addExt(filename, ext : String) : String;
  902. (* implemented with goto for maximum efficiency *)
  903. label x;
  904. var
  905. i : Integer;
  906. begin
  907. addExt := filename;
  908. for i := length(filename) downto 1 do
  909. case filename[i] of
  910. '.' : exit;
  911. DirectorySeparator: goto x;
  912. else
  913. end;
  914. x : addExt := filename+'.'+ext
  915. end(*addExt*);
  916. function file_size(filename : String) : LongInt;
  917. var f : File;
  918. begin
  919. assign(f, filename);
  920. reset(f, 1);
  921. if ioresult=0 then
  922. file_size := fileSize(f)
  923. else
  924. file_size := 0;
  925. close(f);
  926. end(*file_size*);
  927. (* Utility functions for list generating routines: *)
  928. function charStr(c : char; reserved : CClass) : String;
  929. function octStr(c : char) : String;
  930. (* return octal string representation of character c *)
  931. begin
  932. octStr := intStr(ord(c) div 64)+intStr((ord(c) mod 64) div 8)+
  933. intStr(ord(c) mod 8);
  934. end(*octStr*);
  935. begin
  936. case c of
  937. #0..#7, (* nonprintable characters *)
  938. #11,#14..#31,
  939. #127..#255 : charStr := '\'+octStr(c);
  940. bs : charStr := '\b';
  941. tab : charStr := '\t';
  942. nl : charStr := '\n';
  943. cr : charStr := '\c';
  944. ff : charStr := '\f';
  945. '\' : charStr := '\\';
  946. else if c in reserved then
  947. charStr := '\'+c
  948. else
  949. charStr := c
  950. end
  951. end(*charStr*);
  952. function singleQuoteStr(str : String) : String;
  953. var
  954. i : Integer;
  955. str1 : String;
  956. begin
  957. str1 := '';
  958. for i := 1 to length(str) do
  959. str1 := str1+charStr(str[i], ['''']);
  960. singleQuoteStr := ''''+str1+''''
  961. end(*singleQuoteStr*);
  962. function doubleQuoteStr(str : String) : String;
  963. var
  964. i : Integer;
  965. str1 : String;
  966. begin
  967. str1 := '';
  968. for i := 1 to length(str) do
  969. str1 := str1+charStr(str[i], ['"']);
  970. doubleQuoteStr := '"'+str1+'"'
  971. end(*doubleQuoteStr*);
  972. function cclassStr(cc : CClass) : String;
  973. const
  974. reserved : CClass = ['^','-',']'];
  975. MaxChar = #255;
  976. var
  977. c1, c2 : Char;
  978. str : String;
  979. Quit: Boolean;
  980. begin
  981. if cc=[#1..#255]-[nl] then
  982. cclassStr := '.'
  983. else
  984. begin
  985. str := '';
  986. if nchars(cc)>128 then
  987. begin
  988. str := '^';
  989. cc := [#0..#255]-cc;
  990. end;
  991. c1 := chr(0);
  992. Quit := False;
  993. while not Quit do begin
  994. if c1 in cc then begin
  995. c2 := c1;
  996. while (c2<MaxChar) and (succ(c2) in cc) do
  997. c2 := succ(c2);
  998. if c1=c2
  999. then str := str+charStr(c1, reserved)
  1000. else
  1001. if c2=succ(c1)
  1002. then str := str+charStr(c1, reserved)+charStr(c2, reserved)
  1003. else str := str+charStr(c1, reserved)+'-'+charStr(c2, reserved);
  1004. c1 := c2;
  1005. end;
  1006. Quit := c1 = MaxChar;
  1007. if not Quit then
  1008. c1 := Succ(c1);
  1009. end; { of while }
  1010. cclassStr := '['+str+']'
  1011. end
  1012. end(*cclassStr*);
  1013. function cclassOrCharStr(cc : CClass) : String;
  1014. var count : Integer;
  1015. c, c1 : Char;
  1016. begin
  1017. count := 0;
  1018. for c := #0 to #255 do
  1019. if c in cc then
  1020. begin
  1021. c1 := c;
  1022. inc(count);
  1023. if count>1 then
  1024. begin
  1025. cclassOrCharStr := cclassStr(cc);
  1026. exit;
  1027. end;
  1028. end;
  1029. if count=1 then
  1030. cclassOrCharStr := singleQuoteStr(c1)
  1031. else
  1032. cclassOrCharStr := '[]';
  1033. end(*cclassOrCharStr*);
  1034. function regExprStr(r : RegExpr) : String;
  1035. function unparseExpr(r : RegExpr) : String;
  1036. var rule_no, pos : Integer;
  1037. c : Char;
  1038. str : StrPtr;
  1039. cc : CClassPtr;
  1040. r1, r2 : RegExpr;
  1041. begin
  1042. if is_epsExpr(r) then
  1043. unparseExpr := ''
  1044. else if is_markExpr(r, rule_no, pos) then
  1045. unparseExpr := '#('+intStr(rule_no)+','+intStr(pos)+')'
  1046. else if is_charExpr(r, c) then
  1047. unparseExpr := charStr(c, [ '"','.','^','$','[',']','*','+','?',
  1048. '{','}','|','(',')','/','<','>'])
  1049. else if is_strExpr(r, str) then
  1050. unparseExpr := doubleQuoteStr(str^)
  1051. else if is_cclassExpr(r, cc) then
  1052. unparseExpr := cclassStr(cc^)
  1053. else if is_starExpr(r, r1) then
  1054. unparseExpr := unparseExpr(r1)+'*'
  1055. else if is_plusExpr(r, r1) then
  1056. unparseExpr := unparseExpr(r1)+'+'
  1057. else if is_optExpr(r, r1) then
  1058. unparseExpr := unparseExpr(r1)+'?'
  1059. else if is_catExpr(r, r1, r2) then
  1060. unparseExpr := '('+unparseExpr(r1)+unparseExpr(r2)+')'
  1061. else if is_altExpr(r, r1, r2) then
  1062. unparseExpr := '('+unparseExpr(r1)+'|'+unparseExpr(r2)+')'
  1063. else
  1064. fatal('invalid expression');
  1065. end(*unparseExpr*);
  1066. begin
  1067. regExprStr := unparseExpr(r);
  1068. end(*regExprStr*);
  1069. end(*LexBase*).