1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168 |
- {
- This module collects the basic data types and operations used in the TP
- Lex program, and other basic stuff that does not belong anywhere else:
- - Lex input and output files and corresponding bookkeeping information
- used by the parser
- - symbolic character constants
- - dynamically allocated strings and character classes
- - integer sets
- - generic quicksort and hash table routines
- - utilities for list-generating
- - other tiny utilities
- Copyright (c) 1990-92 Albert Graef <[email protected]>
- Copyright (C) 1996 Berend de Boer <[email protected]>
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- $Revision: 1.2 $
- $Modtime: 96-08-01 10:21 $
- $History: LEXBASE.PAS $
- *
- * ***************** Version 2 *****************
- * User: Berend Date: 96-10-10 Time: 21:16
- * Updated in $/Lex and Yacc/tply
- * Updated for protected mode, windows and Delphi 1.X and 2.X.
- }
- unit LexBase;
- interface
- const
- (* symbolic character constants: *)
- bs = #8; (* backspace character *)
- tab = #9; (* tab character *)
- nl = #10; (* newline character *)
- cr = #13; (* carriage return *)
- ff = #12; (* form feed character *)
- var
- (* Filenames: *)
- lfilename : String;
- pasfilename : String;
- lstfilename : String;
- codfilename : String;
- codfilepath1,
- codfilepath2 : String; { Under linux, binary and conf file
- are not in the same path}
- (* Lex input, output, list and code template file: *)
- yyin, yylst, yyout, yycod : Text;
- (* the following values are initialized and updated by the parser: *)
- line : String; (* current input line *)
- lno : Integer; (* current line number *)
- const
- max_elems = 1000; (* maximum size of integer sets *)
- type
- (* String and character class pointers: *)
- StrPtr = ^String;
- CClass = set of Char;
- CClassPtr = ^CClass;
- (* Sorted integer sets: *)
- IntSet = array [0..max_elems] of Integer;
- (* word 0 is size *)
- IntSetPtr = ^IntSet;
- (* Regular expressions: *)
- RegExpr = ^Node;
- NodeType = (mark_node, (* marker node *)
- char_node, (* character node *)
- str_node, (* string node *)
- cclass_node, (* character class node *)
- star_node, (* star node *)
- plus_node, (* plus node *)
- opt_node, (* option node *)
- cat_node, (* concatenation node *)
- alt_node); (* alternatives node (|) *)
- Node = record case node_type : NodeType of
- mark_node : (rule, pos : Integer);
- char_node : (c : Char);
- str_node : (str : StrPtr);
- cclass_node : (cc : CClassPtr);
- star_node, plus_node, opt_node : (r : RegExpr);
- cat_node, alt_node : (r1, r2 : RegExpr);
- end;
- (* Some standard character classes: *)
- const
- letters : CClass = ['A'..'Z','a'..'z','_'];
- digits : CClass = ['0'..'9'];
- alphanums : CClass = ['A'..'Z','a'..'z','_','0'..'9'];
- (* Operations: *)
- (* Strings and character classes: *)
- function newStr(str : String) : StrPtr;
- (* creates a string pointer (only the space actually needed for the given
- string is allocated) *)
- function newCClass(cc : CClass) : CClassPtr;
- (* creates a CClass pointer *)
- (* Integer sets (set arguments are passed by reference even if they are not
- modified, for greater efficiency): *)
- procedure empty(var M : IntSet);
- (* initializes M as empty *)
- procedure singleton(var M : IntSet; i : Integer);
- (* initializes M as a singleton set containing the element i *)
- procedure include(var M : IntSet; i : Integer);
- (* include i in M *)
- procedure exclude(var M : IntSet; i : Integer);
- (* exclude i from M *)
- procedure setunion(var M, N : IntSet);
- (* adds N to M *)
- procedure setminus(var M, N : IntSet);
- (* removes N from M *)
- procedure intersect(var M, N : IntSet);
- (* removes from M all elements NOT in N *)
- function size(var M : IntSet) : Integer;
- (* cardinality of set M *)
- function member(i : Integer; var M : IntSet) : Boolean;
- (* tests for membership of i in M *)
- function isempty(var M : IntSet) : Boolean;
- (* checks whether M is an empty set *)
- function equal(var M, N : IntSet) : Boolean;
- (* checks whether M and N are equal *)
- function subseteq(var M, N : IntSet) : Boolean;
- (* checks whether M is a subset of N *)
- function newIntSet : IntSetPtr;
- (* creates a pointer to an empty integer set *)
- (* Constructors for regular expressions: *)
- const epsExpr : RegExpr = nil;
- (* empty regular expression *)
- function markExpr(rule, pos : Integer) : RegExpr;
- (* markers are used to denote endmarkers of rules, as well as other
- special positions in rules, e.g. the position of the lookahead
- operator; they are considered nullable; by convention, we use
- the following pos numbers:
- - 0: endmarker position
- - 1: lookahead operator position *)
- function charExpr(c : Char) : RegExpr;
- (* character c *)
- function strExpr(str : StrPtr) : RegExpr;
- (* "str" *)
- function cclassExpr(cc : CClassPtr) : RegExpr;
- (* [str] where str are the literals in cc *)
- function starExpr(r : RegExpr) : RegExpr;
- (* r* *)
- function plusExpr(r : RegExpr) : RegExpr;
- (* r+ *)
- function optExpr(r : RegExpr) : RegExpr;
- (* r? *)
- function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
- (* constructor expanding expression r{m,n} to the corresponding
- alt expression r^m|...|r^n *)
- function catExpr(r1, r2 : RegExpr) : RegExpr;
- (* r1r2 *)
- function altExpr(r1, r2 : RegExpr) : RegExpr;
- (* r1|r2 *)
- (* Unifiers for regular expressions:
- The following predicates check whether the specified regular
- expression r is of the denoted type; if the predicate succeeds,
- the other arguments of the predicate are instantiated to the
- corresponding values. *)
- function is_epsExpr(r : RegExpr) : Boolean;
- (* empty regular expression *)
- function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
- (* marker expression *)
- function is_charExpr(r : RegExpr; var c : Char) : Boolean;
- (* character c *)
- function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
- (* "str" *)
- function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
- (* [str] where str are the literals in cc *)
- function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
- (* r1* *)
- function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
- (* r1+ *)
- function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
- (* r1? *)
- function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
- (* r1r2 *)
- function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
- (* r1|r2 *)
- (* Quicksort: *)
- type
- OrderPredicate = function (i, j : Integer) : Boolean;
- SwapProc = procedure (i, j : Integer);
- procedure quicksort(lo, hi: Integer;
- less : OrderPredicate;
- swap : SwapProc);
- (* General inplace sorting procedure based on the quicksort algorithm.
- This procedure can be applied to any sequential data structure;
- only the corresponding routines less which compares, and swap which
- swaps two elements i,j of the target data structure, must be
- supplied as appropriate for the target data structure.
- - lo, hi: the lower and higher indices, indicating the elements to
- be sorted
- - less(i, j): should return true if element no. i `is less than'
- element no. j, and false otherwise; any total quasi-ordering may
- be supplied here (if neither less(i, j) nor less(j, i) then elements
- i and j are assumed to be `equal').
- - swap(i, j): should swap the elements with index i and j *)
- (* Generic hash table routines (based on quadratic rehashing; hence the
- table size must be a prime number): *)
- type
- TableLookupProc = function(k : Integer) : String;
- TableEntryProc = procedure(k : Integer; symbol : String);
- function key(symbol : String;
- table_size : Integer;
- lookup : TableLookupProc;
- entry : TableEntryProc) : Integer;
- (* returns a hash table key for symbol; inserts the symbol into the
- table if necessary
- - table_size is the symbol table size and must be a fixed prime number
- - lookup is the table lookup procedure which should return the string
- at key k in the table ('' if entry is empty)
- - entry is the table entry procedure which is assumed to store the
- given symbol at the given location *)
- function definedKey(symbol : String;
- table_size : Integer;
- lookup : TableLookupProc) : Boolean;
- (* checks the table to see if symbol is in the table *)
- (* Utility routines: *)
- function min(i, j : Integer) : Integer;
- function max(i, j : Integer) : Integer;
- (* minimum and maximum of two integers *)
- function nchars(cc : CClass) : Integer;
- (* returns the cardinality (number of characters) of a character class *)
- function upper(str : String) : String;
- (* returns str converted to uppercase *)
- function strip(str : String) : String;
- (* returns str with leading and trailing blanks stripped off *)
- function blankStr(str : String) : String;
- (* returns string of same length as str, with all non-whitespace characters
- replaced by blanks *)
- function intStr(i : Integer) : String;
- (* returns the string representation of i *)
- function isInt(str : String; var i : Integer) : Boolean;
- (* checks whether str represents an integer; if so, returns the
- value of it in i *)
- function path(filename : String) : String;
- (* returns the path in filename *)
- function root(filename : String) : String;
- (* returns root (i.e. extension stripped from filename) of
- filename *)
- function addExt(filename, ext : String) : String;
- (* if filename has no extension and last filename character is not '.',
- add extension ext to filename *)
- function file_size(filename : String) : LongInt;
- (* determines file size in bytes *)
- (* Utility functions for list generating routines: *)
- function charStr(c : char; reserved : CClass) : String;
- (* returns a print name for character c, using the standard escape
- conventions; reserved is the class of `reserved' special characters
- which should be quoted with \ (\ itself is always quoted) *)
- function singleQuoteStr(str : String) : String;
- (* returns print name of str enclosed in single quotes, using the
- standard escape conventions *)
- function doubleQuoteStr(str : String) : String;
- (* returns print name of str enclosed in double quotes, using the
- standard escape conventions *)
- function cclassStr(cc : CClass) : String;
- (* returns print name of character class cc, using the standard escape
- conventions; if cc contains more than 128 elements, the complement
- notation (^) is used; if cc is the class of all (non-null) characters
- except newline, the period notation is used *)
- function cclassOrCharStr(cc : CClass) : String;
- (* returns a print name for character class cc (either cclassStr, or,
- if cc contains only one element, character in single quotes) *)
- function regExprStr(r : RegExpr) : String;
- (* unparses a regular expression *)
- implementation
- uses LexMsgs;
- (* String and character class pointers: *)
- function newStr(str : String) : StrPtr;
- var strp : StrPtr;
- begin
- getmem(strp, succ(length(str)));
- move(str, strp^, succ(length(str)));
- newStr := strp;
- end(*newStr*);
- function newCClass(cc : CClass) : CClassPtr;
- var ccp : CClassPtr;
- begin
- new(ccp);
- ccp^ := cc;
- newCClass := ccp;
- end(*newCClass*);
- (* Integer sets: *)
- procedure empty(var M : IntSet);
- begin
- M[0] := 0;
- end(*empty*);
- procedure singleton(var M : IntSet; i : Integer);
- begin
- M[0] := 1; M[1] := i;
- end(*singleton*);
- procedure include(var M : IntSet; i : Integer);
- var l, r, k : Integer;
- begin
- (* binary search: *)
- l := 1; r := M[0];
- k := l + (r-l) div 2;
- while (l<r) and (M[k]<>i) do
- begin
- if M[k]<i then
- l := succ(k)
- else
- r := pred(k);
- k := l + (r-l) div 2;
- end;
- if (k>M[0]) or (M[k]<>i) then
- begin
- if M[0]>=max_elems then fatal(intset_overflow);
- if (k<=M[0]) and (M[k]<i) then
- begin
- move(M[k+1], M[k+2], (M[0]-k)*sizeOf(Integer));
- M[k+1] := i;
- end
- else
- begin
- move(M[k], M[k+1], (M[0]-k+1)*sizeOf(Integer));
- M[k] := i;
- end;
- inc(M[0]);
- end;
- end(*include*);
- procedure exclude(var M : IntSet; i : Integer);
- var l, r, k : Integer;
- begin
- (* binary search: *)
- l := 1; r := M[0];
- k := l + (r-l) div 2;
- while (l<r) and (M[k]<>i) do
- begin
- if M[k]<i then
- l := succ(k)
- else
- r := pred(k);
- k := l + (r-l) div 2;
- end;
- if (k<=M[0]) and (M[k]=i) then
- begin
- move(M[k+1], M[k], (M[0]-k)*sizeOf(Integer));
- dec(M[0]);
- end;
- end(*exclude*);
- procedure setunion(var M, N : IntSet);
- var
- K : IntSet;
- i, j, i_M, i_N : Integer;
- begin
- (* merge sort: *)
- i := 0; i_M := 1; i_N := 1;
- while (i_M<=M[0]) and (i_N<=N[0]) do
- begin
- inc(i);
- if i>max_elems then fatal(intset_overflow);
- if M[i_M]<N[i_N] then
- begin
- K[i] := M[i_M]; inc(i_M);
- end
- else if N[i_N]<M[i_M] then
- begin
- K[i] := N[i_N]; inc(i_N);
- end
- else
- begin
- K[i] := M[i_M]; inc(i_M); inc(i_N);
- end
- end;
- for j := i_M to M[0] do
- begin
- inc(i);
- if i>max_elems then fatal(intset_overflow);
- K[i] := M[j];
- end;
- for j := i_N to N[0] do
- begin
- inc(i);
- if i>max_elems then fatal(intset_overflow);
- K[i] := N[j];
- end;
- K[0] := i;
- move(K, M, succ(i)*sizeOf(Integer));
- end(*setunion*);
- procedure setminus(var M, N : IntSet);
- var
- K : IntSet;
- i, i_M, i_N : Integer;
- begin
- i := 0; i_N := 1;
- for i_M := 1 to M[0] do
- begin
- while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
- if (i_N>N[0]) or (N[i_N]>M[i_M]) then
- begin
- inc(i);
- K[i] := M[i_M];
- end
- else
- inc(i_N);
- end;
- K[0] := i;
- move(K, M, succ(i)*sizeOf(Integer));
- end(*setminus*);
- procedure intersect(var M, N : IntSet);
- var
- K : IntSet;
- i, i_M, i_N : Integer;
- begin
- i := 0; i_N := 1;
- for i_M := 1 to M[0] do
- begin
- while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
- if (i_N<=N[0]) and (N[i_N]=M[i_M]) then
- begin
- inc(i);
- K[i] := M[i_M];
- inc(i_N);
- end
- end;
- K[0] := i;
- move(K, M, succ(i)*sizeOf(Integer));
- end(*intersect*);
- function size(var M : IntSet) : Integer;
- begin
- size := M[0]
- end(*size*);
- function member(i : Integer; var M : IntSet) : Boolean;
- var l, r, k : Integer;
- begin
- (* binary search: *)
- l := 1; r := M[0];
- k := l + (r-l) div 2;
- while (l<r) and (M[k]<>i) do
- begin
- if M[k]<i then
- l := succ(k)
- else
- r := pred(k);
- k := l + (r-l) div 2;
- end;
- member := (k<=M[0]) and (M[k]=i);
- end(*member*);
- function isempty(var M : IntSet) : Boolean;
- begin
- isempty := M[0]=0
- end(*isempty*);
- function equal(var M, N : IntSet) : Boolean;
- var i : Integer;
- begin
- if M[0]<>N[0] then
- equal := false
- else
- begin
- for i := 1 to M[0] do
- if M[i]<>N[i] then
- begin
- equal := false;
- exit
- end;
- equal := true
- end
- end(*equal*);
- function subseteq(var M, N : IntSet) : Boolean;
- var
- i_M, i_N : Integer;
- begin
- if M[0]>N[0] then
- subseteq := false
- else
- begin
- i_N := 1;
- for i_M := 1 to M[0] do
- begin
- while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
- if (i_N>N[0]) or (N[i_N]>M[i_M]) then
- begin
- subseteq := false;
- exit
- end
- else
- inc(i_N);
- end;
- subseteq := true
- end;
- end(*subseteq*);
- function newIntSet : IntSetPtr;
- var
- MP : IntSetPtr;
- begin
- getmem(MP, (max_elems+1)*sizeOf(Integer));
- MP^[0] := 0;
- newIntSet := MP
- end(*newIntSet*);
- (* Constructors for regular expressions: *)
- function newExpr(node_type : NodeType; n : Integer) : RegExpr;
- (* returns new RegExpr node (n: number of bytes to allocate) *)
- var x : RegExpr;
- begin
- getmem(x, sizeOf(NodeType)+n);
- x^.node_type := node_type;
- newExpr := x
- end(*newExpr*);
- function markExpr(rule, pos : Integer) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(mark_node, 2*sizeOf(Integer));
- x^.rule := rule;
- x^.pos := pos;
- markExpr := x
- end(*markExpr*);
- function charExpr(c : Char) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(char_node, sizeOf(Char));
- x^.c := c;
- charExpr := x
- end(*charExpr*);
- function strExpr(str : StrPtr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(str_node, sizeOf(StrPtr));
- x^.str := str;
- strExpr := x
- end(*strExpr*);
- function cclassExpr(cc : CClassPtr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(cclass_node, sizeOf(CClassPtr));
- x^.cc := cc;
- cclassExpr := x
- end(*cclassExpr*);
- function starExpr(r : RegExpr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(star_node, sizeOf(RegExpr));
- x^.r := r;
- starExpr := x
- end(*starExpr*);
- function plusExpr(r : RegExpr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(plus_node, sizeOf(RegExpr));
- x^.r := r;
- plusExpr := x
- end(*plusExpr*);
- function optExpr(r : RegExpr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(opt_node, sizeOf(RegExpr));
- x^.r := r;
- optExpr := x
- end(*optExpr*);
- function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
- var
- ri, rmn : RegExpr;
- i : Integer;
- begin
- if (m>n) or (n=0) then
- mnExpr := epsExpr
- else
- begin
- (* construct r^m: *)
- if m=0 then
- ri := epsExpr
- else
- begin
- ri := r;
- for i := 2 to m do
- ri := catExpr(ri, r);
- end;
- (* construct r{m,n}: *)
- rmn := ri; (* r{m,n} := r^m *)
- for i := m+1 to n do
- begin
- if is_epsExpr(ri) then
- ri := r
- else
- ri := catExpr(ri, r);
- rmn := altExpr(rmn, ri) (* r{m,n} := r{m,n} | r^i,
- i=m+1,...,n *)
- end;
- mnExpr := rmn
- end
- end(*mnExpr*);
- function catExpr(r1, r2 : RegExpr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(cat_node, 2*sizeOf(RegExpr));
- x^.r1 := r1;
- x^.r2 := r2;
- catExpr := x
- end(*catExpr*);
- function altExpr(r1, r2 : RegExpr) : RegExpr;
- var x : RegExpr;
- begin
- x := newExpr(alt_node, 2*sizeOf(RegExpr));
- x^.r1 := r1;
- x^.r2 := r2;
- altExpr := x
- end(*altExpr*);
- (* Unifiers for regular expressions: *)
- function is_epsExpr(r : RegExpr) : Boolean;
- begin
- is_epsExpr := r=epsExpr
- end(*is_epsExpr*);
- function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
- begin
- if r=epsExpr then
- is_markExpr := false
- else if r^.node_type=mark_node then
- begin
- is_markExpr := true;
- rule := r^.rule;
- pos := r^.pos;
- end
- else
- is_markExpr := false
- end(*is_markExpr*);
- function is_charExpr(r : RegExpr; var c : Char) : Boolean;
- begin
- if r=epsExpr then
- is_charExpr := false
- else if r^.node_type=char_node then
- begin
- is_charExpr := true;
- c := r^.c
- end
- else
- is_charExpr := false
- end(*is_charExpr*);
- function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
- begin
- if r=epsExpr then
- is_strExpr := false
- else if r^.node_type=str_node then
- begin
- is_strExpr := true;
- str := r^.str;
- end
- else
- is_strExpr := false
- end(*is_strExpr*);
- function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
- begin
- if r=epsExpr then
- is_cclassExpr := false
- else if r^.node_type=cclass_node then
- begin
- is_cclassExpr := true;
- cc := r^.cc
- end
- else
- is_cclassExpr := false
- end(*is_cclassExpr*);
- function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
- begin
- if r=epsExpr then
- is_starExpr := false
- else if r^.node_type=star_node then
- begin
- is_starExpr := true;
- r1 := r^.r
- end
- else
- is_starExpr := false
- end(*is_starExpr*);
- function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
- begin
- if r=epsExpr then
- is_plusExpr := false
- else if r^.node_type=plus_node then
- begin
- is_plusExpr := true;
- r1 := r^.r
- end
- else
- is_plusExpr := false
- end(*is_plusExpr*);
- function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
- begin
- if r=epsExpr then
- is_optExpr := false
- else if r^.node_type=opt_node then
- begin
- is_optExpr := true;
- r1 := r^.r
- end
- else
- is_optExpr := false
- end(*is_optExpr*);
- function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
- begin
- if r=epsExpr then
- is_catExpr := false
- else if r^.node_type=cat_node then
- begin
- is_catExpr := true;
- r1 := r^.r1;
- r2 := r^.r2
- end
- else
- is_catExpr := false
- end(*is_catExpr*);
- function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
- begin
- if r=epsExpr then
- is_altExpr := false
- else if r^.node_type=alt_node then
- begin
- is_altExpr := true;
- r1 := r^.r1;
- r2 := r^.r2
- end
- else
- is_altExpr := false
- end(*is_altExpr*);
- (* Quicksort: *)
- procedure quicksort(lo, hi: Integer;
- less : OrderPredicate;
- swap : SwapProc);
- (* derived from the quicksort routine in QSORT.PAS in the Turbo Pascal
- distribution *)
- procedure sort(l, r: Integer);
- var i, j, k : Integer;
- begin
- i := l; j := r; k := (l+r) DIV 2;
- repeat
- while less(i, k) do inc(i);
- while less(k, j) do dec(j);
- if i<=j then
- begin
- swap(i, j);
- if k=i then k := j (* pivot element swapped! *)
- else if k=j then k := i;
- inc(i); dec(j);
- end;
- until i>j;
- if l<j then sort(l,j);
- if i<r then sort(i,r);
- end(*sort*);
- begin
- if lo<hi then sort(lo,hi);
- end(*quicksort*);
- (* Generic hash table routines: *)
- function hash(str : String; table_size : Integer) : Integer;
- (* computes a hash key for str *)
- var i, key : Integer;
- begin
- key := 0;
- for i := 1 to length(str) do
- inc(key, ord(str[i]));
- hash := key mod table_size + 1;
- end(*hash*);
- procedure newPos(var pos, incr, count : Integer; table_size : Integer);
- (* computes a new position in the table (quadratic collision strategy)
- - pos: current position (+inc)
- - incr: current increment (+2)
- - count: current number of collisions (+1)
- quadratic collision formula for position of str after n collisions:
- pos(str, n) = (hash(str)+n^2) mod table_size +1
- note that n^2-(n-1)^2 = 2n-1 <=> n^2 = (n-1)^2 + (2n-1) for n>0,
- i.e. the increment inc=2n-1 increments by two in each collision *)
- begin
- inc(count);
- inc(pos, incr);
- if pos>table_size then pos := pos mod table_size + 1;
- inc(incr, 2)
- end(*newPos*);
- function key(symbol : String;
- table_size : Integer;
- lookup : TableLookupProc;
- entry : TableEntryProc) : Integer;
- var pos, incr, count : Integer;
- begin
- pos := hash(symbol, table_size);
- incr := 1;
- count := 0;
- while count<=table_size do
- if lookup(pos)='' then
- begin
- entry(pos, symbol);
- key := pos;
- exit
- end
- else if lookup(pos)=symbol then
- begin
- key := pos;
- exit
- end
- else
- newPos(pos, incr, count, table_size);
- fatal(sym_table_overflow)
- end(*key*);
- function definedKey(symbol : String;
- table_size : Integer;
- lookup : TableLookupProc) : Boolean;
- var pos, incr, count : Integer;
- begin
- pos := hash(symbol, table_size);
- incr := 1;
- count := 0;
- while count<=table_size do
- if lookup(pos)='' then
- begin
- definedKey := false;
- exit
- end
- else if lookup(pos)=symbol then
- begin
- definedKey := true;
- exit
- end
- else
- newPos(pos, incr, count, table_size);
- definedKey := false
- end(*definedKey*);
- (* Utility routines: *)
- function min(i, j : Integer) : Integer;
- begin
- if i<j then
- min := i
- else
- min := j
- end(*min*);
- function max(i, j : Integer) : Integer;
- begin
- if i>j then
- max := i
- else
- max := j
- end(*max*);
- function nchars(cc : CClass) : Integer;
- var
- c : Char;
- count : Integer;
- begin
- count := 0;
- for c := #0 to #255 do if c in cc then inc(count);
- nchars := count;
- end(*nchars*);
- function upper(str : String) : String;
- var i : Integer;
- begin
- for i := 1 to length(str) do
- str[i] := upCase(str[i]);
- upper := str
- end(*upper*);
- function strip(str : String) : String;
- begin
- while (length(str)>0) and ((str[1]=' ') or (str[1]=tab)) do
- delete(str, 1, 1);
- while (length(str)>0) and
- ((str[length(str)]= ' ') or
- (str[length(str)]=tab)) do
- delete(str, length(str), 1);
- strip := str;
- end(*strip*);
- function blankStr(str : String) : String;
- var i : Integer;
- begin
- for i := 1 to length(str) do
- if str[i]<>tab then str[i] := ' ';
- blankStr := str;
- end(*blankStr*);
- function intStr(i : Integer) : String;
- var s : String;
- begin
- str(i, s);
- intStr := s
- end(*intStr*);
- function isInt(str : String; var i : Integer) : Boolean;
- var res : Integer;
- begin
- val(str, i, res);
- isInt := res = 0;
- end(*isInt*);
- function path(filename : String) : String;
- var i : Integer;
- begin
- i := length(filename);
- while (i>0) and (filename[i]<>DirectorySeparator) and (filename[i]<>':') do
- dec(i);
- path := copy(filename, 1, i);
- end(*path*);
- function root(filename : String) : String;
- var
- i : Integer;
- begin
- root := filename;
- for i := length(filename) downto 1 do
- case filename[i] of
- '.' :
- begin
- root := copy(filename, 1, i-1);
- exit
- end;
- DirectorySeparator : exit;
- else
- end;
- end(*root*);
- function addExt(filename, ext : String) : String;
- (* implemented with goto for maximum efficiency *)
- label x;
- var
- i : Integer;
- begin
- addExt := filename;
- for i := length(filename) downto 1 do
- case filename[i] of
- '.' : exit;
- DirectorySeparator: goto x;
- else
- end;
- x : addExt := filename+'.'+ext
- end(*addExt*);
- function file_size(filename : String) : LongInt;
- var f : File;
- begin
- assign(f, filename);
- reset(f, 1);
- if ioresult=0 then
- file_size := fileSize(f)
- else
- file_size := 0;
- close(f);
- end(*file_size*);
- (* Utility functions for list generating routines: *)
- function charStr(c : char; reserved : CClass) : String;
- function octStr(c : char) : String;
- (* return octal string representation of character c *)
- begin
- octStr := intStr(ord(c) div 64)+intStr((ord(c) mod 64) div 8)+
- intStr(ord(c) mod 8);
- end(*octStr*);
- begin
- case c of
- #0..#7, (* nonprintable characters *)
- #11,#14..#31,
- #127..#255 : charStr := '\'+octStr(c);
- bs : charStr := '\b';
- tab : charStr := '\t';
- nl : charStr := '\n';
- cr : charStr := '\c';
- ff : charStr := '\f';
- '\' : charStr := '\\';
- else if c in reserved then
- charStr := '\'+c
- else
- charStr := c
- end
- end(*charStr*);
- function singleQuoteStr(str : String) : String;
- var
- i : Integer;
- str1 : String;
- begin
- str1 := '';
- for i := 1 to length(str) do
- str1 := str1+charStr(str[i], ['''']);
- singleQuoteStr := ''''+str1+''''
- end(*singleQuoteStr*);
- function doubleQuoteStr(str : String) : String;
- var
- i : Integer;
- str1 : String;
- begin
- str1 := '';
- for i := 1 to length(str) do
- str1 := str1+charStr(str[i], ['"']);
- doubleQuoteStr := '"'+str1+'"'
- end(*doubleQuoteStr*);
- function cclassStr(cc : CClass) : String;
- const
- reserved : CClass = ['^','-',']'];
- MaxChar = #255;
- var
- c1, c2 : Char;
- str : String;
- Quit: Boolean;
- begin
- if cc=[#1..#255]-[nl] then
- cclassStr := '.'
- else
- begin
- str := '';
- if nchars(cc)>128 then
- begin
- str := '^';
- cc := [#0..#255]-cc;
- end;
- c1 := chr(0);
- Quit := False;
- while not Quit do begin
- if c1 in cc then begin
- c2 := c1;
- while (c2<MaxChar) and (succ(c2) in cc) do
- c2 := succ(c2);
- if c1=c2
- then str := str+charStr(c1, reserved)
- else
- if c2=succ(c1)
- then str := str+charStr(c1, reserved)+charStr(c2, reserved)
- else str := str+charStr(c1, reserved)+'-'+charStr(c2, reserved);
- c1 := c2;
- end;
- Quit := c1 = MaxChar;
- if not Quit then
- c1 := Succ(c1);
- end; { of while }
- cclassStr := '['+str+']'
- end
- end(*cclassStr*);
- function cclassOrCharStr(cc : CClass) : String;
- var count : Integer;
- c, c1 : Char;
- begin
- count := 0;
- for c := #0 to #255 do
- if c in cc then
- begin
- c1 := c;
- inc(count);
- if count>1 then
- begin
- cclassOrCharStr := cclassStr(cc);
- exit;
- end;
- end;
- if count=1 then
- cclassOrCharStr := singleQuoteStr(c1)
- else
- cclassOrCharStr := '[]';
- end(*cclassOrCharStr*);
- function regExprStr(r : RegExpr) : String;
- function unparseExpr(r : RegExpr) : String;
- var rule_no, pos : Integer;
- c : Char;
- str : StrPtr;
- cc : CClassPtr;
- r1, r2 : RegExpr;
- begin
- if is_epsExpr(r) then
- unparseExpr := ''
- else if is_markExpr(r, rule_no, pos) then
- unparseExpr := '#('+intStr(rule_no)+','+intStr(pos)+')'
- else if is_charExpr(r, c) then
- unparseExpr := charStr(c, [ '"','.','^','$','[',']','*','+','?',
- '{','}','|','(',')','/','<','>'])
- else if is_strExpr(r, str) then
- unparseExpr := doubleQuoteStr(str^)
- else if is_cclassExpr(r, cc) then
- unparseExpr := cclassStr(cc^)
- else if is_starExpr(r, r1) then
- unparseExpr := unparseExpr(r1)+'*'
- else if is_plusExpr(r, r1) then
- unparseExpr := unparseExpr(r1)+'+'
- else if is_optExpr(r, r1) then
- unparseExpr := unparseExpr(r1)+'?'
- else if is_catExpr(r, r1, r2) then
- unparseExpr := '('+unparseExpr(r1)+unparseExpr(r2)+')'
- else if is_altExpr(r, r1, r2) then
- unparseExpr := '('+unparseExpr(r1)+'|'+unparseExpr(r2)+')'
- else
- fatal('invalid expression');
- end(*unparseExpr*);
- begin
- regExprStr := unparseExpr(r);
- end(*regExprStr*);
- end(*LexBase*).
|