tply.tex 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642
  1. \documentstyle[11pt]{article}
  2. \title{TP Lex and Yacc -- The Compiler Writer's Tools for Turbo Pascal\\
  3. Version 4.1 User Manual}
  4. \author{Albert Gr\"af\\
  5. Department of Musicinformatics\\
  6. Johannes Gutenberg-University Mainz\\
  7. \\
  8. [email protected]}
  9. \date{April 1998}
  10. \setlength{\topmargin}{0cm}
  11. \setlength{\oddsidemargin}{0cm}
  12. \setlength{\evensidemargin}{0cm}
  13. \setlength{\textwidth}{16cm}
  14. \setlength{\textheight}{21cm}
  15. %\setlength{\parindent}{0pt}
  16. \parskip=4pt plus 1pt minus 1pt
  17. \itemsep=0pt
  18. \renewcommand{\baselinestretch}{1.1}
  19. \unitlength=1mm
  20. %\tolerance=500
  21. %\parskip=0.1cm
  22. \leftmargini 1.5em
  23. \leftmarginii 1.5em \leftmarginiii 1.5em \leftmarginiv 1.5em \leftmarginv 1.5em
  24. \leftmarginvi 1.5em
  25. \begin{document}
  26. \maketitle
  27. \tableofcontents
  28. \section{Introduction}
  29. This document describes the TP Lex and Yacc compiler generator toolset.
  30. These tools are designed especially to help you prepare compilers and
  31. similar programs like text processing utilities and command language
  32. interpreters with the Turbo Pascal (TM) programming language.
  33. TP Lex and Yacc are Turbo Pascal adaptions of the well-known UNIX (TM)
  34. utilities Lex and Yacc, which were written by M.E. Lesk and S.C. Johnson
  35. at Bell Laboratories, and are used with the C programming language. TP Lex
  36. and Yacc are intended to be approximately ``compatible'' with these programs.
  37. However, they are an independent development of the author, based on the
  38. techniques described in the famous ``dragon book'' of Aho, Sethi and Ullman
  39. (Aho, Sethi, Ullman: {\em Compilers : principles, techniques and tools,\/}
  40. Reading (Mass.), Addison-Wesley, 1986).
  41. Version 4.1 of TP Lex and Yacc works with all recent flavours of Turbo/Borland
  42. Pascal, including Delphi, and with the Free Pascal Compiler, a free Turbo
  43. Pascal-compatible compiler which currently runs on DOS and Linux (other ports
  44. are under development). Recent information about TP Lex/Yacc, and the sources
  45. are available from the TPLY homepage:
  46. \begin{quote}\begin{verbatim}
  47. http://www.musikwissenschaft.uni-mainz.de/~ag/tply
  48. \end{verbatim}\end{quote}
  49. For information about the Free Pascal Compiler, please refer to:
  50. \begin{quote}\begin{verbatim}
  51. http://www.freepascal.org
  52. \end{verbatim}\end{quote}
  53. TP Lex and Yacc, like any other tools of this kind, are not intended for
  54. novices or casual programmers; they require extensive programming experience
  55. as well as a thorough understanding of the principles of parser design and
  56. implementation to be put to work successfully. But if you are a seasoned
  57. Turbo Pascal programmer with some background in compiler design and formal
  58. language theory, you will almost certainly find TP Lex and Yacc to be a
  59. powerful extension of your Turbo Pascal toolset.
  60. This manual tells you how to get started with the TP Lex and Yacc programs
  61. and provides a short description of these programs. Some knowledge about
  62. the C versions of Lex and Yacc will be useful, although not strictly
  63. necessary. For further reading, you may also refer to:
  64. \begin{itemize}
  65. \item
  66. Aho, Sethi and Ullman: {\em Compilers : principles, techniques and
  67. tools.\/} Reading (Mass.), Addison-Wesley, 1986.
  68. \item
  69. Johnson, S.C.: {\em Yacc -- yet another compiler-compiler.\/} CSTR-32,
  70. Bell Telephone Laboratories, 1974.
  71. \item
  72. Lesk, M.E.: {\em Lex -- a lexical analyser generator.\/} CSTR-39, Bell
  73. Telephone Laboratories, 1975.
  74. \item
  75. Schreiner, Friedman: {\em Introduction to compiler construction with
  76. UNIX.\/} Prentice-Hall, 1985.
  77. \item
  78. The Unix Programmer's Manual, Sections `Lex' and `Yacc'.
  79. \end{itemize}
  80. \subsection*{Credits}
  81. I would like to thank Berend de Boer ([email protected]), who adapted TP Lex
  82. and Yacc to take advantage of the large memory models in Borland Pascal 7.0
  83. and Delphi, and Michael Van Canneyt ([email protected]),
  84. the maintainer of the Linux version of the Free Pascal compiler, who is
  85. responsible for the Free Pascal port. And of course thanks are due to the many
  86. TP Lex/Yacc users all over the world for their support and comments which
  87. helped to improve these programs.
  88. \subsection*{Getting Started}
  89. Instructions on how to compile and install TP Lex and Yacc on all supported
  90. platforms can be found in the \verb"README" file contained in the
  91. distribution.
  92. Once you have installed TP Lex and Yacc on your system, you can compile your
  93. first TP Lex and Yacc program \verb"expr". \verb"Expr" is a simple desktop
  94. calculator program contained in the distribution, which consists of a lexical
  95. analyzer in the TP Lex source file \verb"exprlex.l" and the parser and main
  96. program in the TP Yacc source file \verb"expr.y". To compile these programs,
  97. issue the commands
  98. \begin{quote}\begin{verbatim}
  99. lex exprlex
  100. yacc expr
  101. \end{verbatim}\end{quote}
  102. That's it! You now have the Turbo Pascal sources (\verb"exprlex.pas" and
  103. \verb"expr.pas") for the \verb"expr" program. Use the Turbo Pascal
  104. compiler to compile these programs as follows:
  105. \begin{quote}\begin{verbatim}
  106. tpc expr
  107. \end{verbatim}\end{quote}
  108. (Of course, the precise compilation command depends on the type of compiler
  109. you are using. Thus you may have to replace \verb"tpc" with \verb"bpc",
  110. \verb"dcc" or \verb"dcc32", depending on the version of the
  111. Turbo/Borland/Delphi compiler you have, and with \verb"ppc386" for the Free
  112. Pascal compiler. If you are using TP Lex and Yacc with Free Pascal under
  113. Linux, the corresponding commands are:
  114. \begin{quote}\begin{verbatim}
  115. plex exprlex
  116. pyacc expr
  117. ppc386 expr
  118. \end{verbatim}\end{quote}
  119. Note that in the Linux version, the programs are named \verb"plex" and
  120. \verb"pyacc" to avoid name clashes with the corresponding UNIX utilities.)
  121. Having compiled \verb"expr.pas", you can execute the \verb"expr" program and
  122. type some expressions to see it work (terminate the program with an empty
  123. line). There is a number of other sample TP Lex and Yacc programs (\verb".l"
  124. and \verb".y" files) in the distribution, including a TP Yacc cross reference
  125. utility and a complete parser for Standard Pascal.
  126. The TP Lex and Yacc programs recognize some options which may be specified
  127. anywhere on the command line. E.g.,
  128. \begin{quote}\begin{verbatim}
  129. lex -o exprlex
  130. \end{verbatim}\end{quote}
  131. runs TP Lex with ``DFA optimization'' and
  132. \begin{quote}\begin{verbatim}
  133. yacc -v expr
  134. \end{verbatim}\end{quote}
  135. runs TP Yacc in ``verbose'' mode (TP Yacc generates a readable description
  136. of the generated parser).
  137. The TP Lex and Yacc programs use the following default filename extensions:
  138. \begin{itemize}
  139. \item \verb".l": TP Lex input files
  140. \item \verb".y": TP Yacc input files
  141. \item \verb".pas": TP Lex and Yacc output files
  142. \end{itemize}
  143. As usual, you may overwrite default filename extensions by explicitly
  144. specifying suffixes.
  145. If you ever forget how to run TP Lex and Yacc, you can issue the command
  146. \verb"lex" or \verb"yacc" (resp.\ \verb"plex" or \verb"pyacc")
  147. without arguments to get a short summary of the command line syntax.
  148. \section{TP Lex}
  149. This section describes the TP Lex lexical analyzer generator.
  150. \subsection{Usage}
  151. \begin{quote}\begin{verbatim}
  152. lex [options] lex-file[.l] [output-file[.pas]]
  153. \end{verbatim}\end{quote}
  154. \subsection{Options}
  155. \begin{description}
  156. \item[\verb"-v"]
  157. ``Verbose:'' Lex generates a readable description of the generated
  158. lexical analyzer, written to lex-file with new extension \verb".lst".
  159. \item[\verb"-o"]
  160. ``Optimize:'' Lex optimizes DFA tables to produce a minimal DFA.
  161. \end{description}
  162. \subsection{Description}
  163. TP Lex is a program generator that is used to generate the Turbo Pascal
  164. source code for a lexical analyzer subroutine from the specification
  165. of an input language by a regular expression grammar.
  166. TP Lex parses the source grammar contained in \verb"lex-file" (with default
  167. suffix \verb".l") and writes the constructed lexical analyzer subroutine
  168. to the specified \verb"output-file" (with default suffix \verb".pas"); if no
  169. output file is specified, output goes to \verb"lex-file" with new suffix
  170. \verb".pas." If any errors are found during compilation, error messages are
  171. written to the list file (\verb"lex-file" with new suffix \verb".lst").
  172. The generated output file contains a lexical analyzer routine, \verb"yylex",
  173. implemented as:
  174. \begin{quote}\begin{verbatim}
  175. function yylex : Integer;
  176. \end{verbatim}\end{quote}
  177. This routine has to be called by your main program to execute the lexical
  178. analyzer. The return value of the \verb"yylex" routine usually denotes the
  179. number of a token recognized by the lexical analyzer (see the \verb"return"
  180. routine in the \verb"LexLib" unit). At end-of-file the \verb"yylex" routine
  181. normally returns \verb"0".
  182. The code template for the \verb"yylex" routine may be found in the
  183. \verb"yylex.cod" file. This file is needed by TP Lex when it constructs the
  184. output file. It must be present either in the current directory or in the
  185. directory from which TP Lex was executed (TP Lex searches these directories in
  186. the indicated order). (NB: For the Linux/Free Pascal version, the code
  187. template is searched in some directory defined at compile-time instead of the
  188. execution path, usually /usr/lib/fpc/lexyacc.)
  189. The TP Lex library (\verb"LexLib") unit is required by programs using
  190. Lex-generated lexical analyzers; you will therefore have to put an appropriate
  191. \verb"uses" clause into your program or unit that contains the lexical
  192. analyzer routine. The \verb"LexLib" unit also provides various useful utility
  193. routines; see the file \verb"lexlib.pas" for further information.
  194. \subsection{Lex Source}
  195. A TP Lex program consists of three sections separated with the \verb"%%"
  196. delimiter:
  197. \begin{quote}\begin{verbatim}
  198. definitions
  199. %%
  200. rules
  201. %%
  202. auxiliary procedures
  203. \end{verbatim}\end{quote}
  204. All sections may be empty. The TP Lex language is line-oriented; definitions
  205. and rules are separated by line breaks. There is no special notation for
  206. comments, but (Turbo Pascal style) comments may be included as Turbo Pascal
  207. fragments (see below).
  208. The definitions section may contain the following elements:
  209. \begin{itemize}
  210. \item
  211. regular definitions in the format:
  212. \begin{quote}\begin{verbatim}
  213. name substitution
  214. \end{verbatim}\end{quote}
  215. which serve to abbreviate common subexpressions. The \verb"{name}"
  216. notation causes the corresponding substitution from the definitions
  217. section to be inserted into a regular expression. The name must be
  218. a legal identifier (letter followed by a sequence of letters and digits;
  219. the underscore counts as a letter; upper- and lowercase are distinct).
  220. Regular definitions must be non-recursive.
  221. \item
  222. start state definitions in the format:
  223. \begin{quote}\begin{verbatim}
  224. %start name ...
  225. \end{verbatim}\end{quote}
  226. which are used in specifying start conditions on rules (described
  227. below). The \verb"%start" keyword may also be abbreviated as \verb"%s"
  228. or \verb"%S".
  229. \item
  230. Turbo Pascal declarations enclosed between \verb"%{" and \verb"%}".
  231. These will be inserted into the output file (at global scope). Also,
  232. any line that does not look like a Lex definition (e.g., starts with
  233. blank or tab) will be treated as Turbo Pascal code. (In particular,
  234. this also allows you to include Turbo Pascal comments in your Lex
  235. program.)
  236. \end{itemize}
  237. The rules section of a TP Lex program contains the actual specification of
  238. the lexical analyzer routine. It may be thought of as a big \verb"CASE"
  239. statement discriminating over the different patterns to be matched and listing the
  240. corresponding statements (actions) to be executed. Each rule consists of a
  241. regular expression describing the strings to be matched in the input, and a
  242. corresponding action, a Turbo Pascal statement to be executed when the
  243. expression matches. Expression and statement are delimited with whitespace
  244. (blanks and/or tabs). Thus the format of a Lex grammar rule is:
  245. \begin{quote}\begin{verbatim}
  246. expression statement;
  247. \end{verbatim}\end{quote}
  248. Note that the action must be a single Turbo Pascal statement terminated
  249. with a semicolon (use \verb"begin ... end" for compound statements). The
  250. statement may span multiple lines if the successor lines are indented with
  251. at least one blank or tab. The action may also be replaced by the \verb"|"
  252. character, indicating that the action for this rule is the same as that for
  253. the next one.
  254. The TP Lex library unit provides various variables and routines which are
  255. useful in the programming of actions. In particular, the \verb"yytext" string
  256. variable holds the text of the matched string, and the \verb"yyleng" Byte
  257. variable its length.
  258. Regular expressions are used to describe the strings to be matched in a
  259. grammar rule. They are built from the usual constructs describing character
  260. classes and sequences, and operators specifying repetitions and alternatives.
  261. The precise format of regular expressions is described in the next section.
  262. The rules section may also start with some Turbo Pascal declarations
  263. (enclosed in \verb"%{ %}") which are treated as local declarations of the
  264. actions routine.
  265. Finally, the auxiliary procedures section may contain arbitrary Turbo
  266. Pascal code (such as supporting routines or a main program) which is
  267. simply tacked on to the end of the output file. The auxiliary procedures
  268. section is optional.
  269. \subsection{Regular Expressions}
  270. Table \ref{tab1} summarizes the format of the regular expressions
  271. recognized by TP Lex (also compare Aho, Sethi, Ullman 1986, fig.\ 3.48).
  272. $c$ stands for a single character, $s$ for a string, $r$ for a regular
  273. expression, and $n,m$ for nonnegative integers.
  274. \begin{table*}\centering
  275. \begin{tabular}{c|c|c}
  276. \hline\hline
  277. {\sc Expression}& {\sc Matches}& {\sc Example}\\
  278. \hline
  279. $c$& any non-operator character $c$& \verb"a"\\
  280. \verb"\"$c$& character $c$ literally& \verb"\*"\\
  281. \verb'"'$s$\verb'"'& string $s$ literally& \verb'"**"'\\
  282. \verb"."& any character but newline& \verb"a.*b"\\
  283. \verb"^"& beginning of line& \verb"^abc"\\
  284. \verb"$"& end of line& \verb"abc$"\\
  285. \verb"["$s$\verb"]"& any character in $s$& \verb"[abc]"\\
  286. \verb"[^"$s$\verb"]"& any character not in $s$& \verb"[^abc]"\\
  287. $r$\verb"*"& zero or more $r$'s& \verb"a*"\\
  288. $r$\verb"+"& one or more $r$'s& \verb"a+"\\
  289. $r$\verb"?"& zero or one $r$& \verb"a?"\\
  290. $r$\verb"{"$m$\verb","$n$\verb"}"& $m$ to $n$ occurrences of $r$& \verb"a{1,5}"\\
  291. $r$\verb"{"$m$\verb"}"& $m$ occurrences of $r$& \verb"a{5}"\\
  292. $r_1r_2$& $r_1$ then $r_2$& \verb"ab"\\
  293. $r_1$\verb"|"$r_2$& $r_1$ or $r_2$& \verb"a|b"\\
  294. \verb"("$r$\verb")"& $r$& \verb"(a|b)"\\
  295. $r_1$\verb"/"$r_2$& $r_1$ when followed by $r_2$& \verb"a/b"\\
  296. \verb"<"$x$\verb">"$r$& $r$ when in start condition $x$& \verb"<x>abc"\\
  297. \hline
  298. \end{tabular}
  299. \caption{Regular expressions.}
  300. \label{tab1}
  301. \end{table*}
  302. The operators \verb"*", \verb"+", \verb"?" and \verb"{}" have highest
  303. precedence, followed by concatenation. The \verb"|" operator has lowest
  304. precedence. Parentheses \verb"()" may be used to group expressions and
  305. overwrite default precedences. The \verb"<>" and \verb"/" operators may only
  306. occur once in an expression.
  307. The usual C-like escapes are recognized:
  308. \begin{itemize}
  309. \item \verb"\n" denotes newline
  310. \item \verb"\r" denotes carriage return
  311. \item \verb"\t" denotes tab
  312. \item \verb"\b" denotes backspace
  313. \item \verb"\f" denotes form feed
  314. \item \verb"\"$nnn$ denotes character no.\ $nnn$ in octal base
  315. \end{itemize}
  316. You can also use the \verb"\" character to quote characters which would
  317. otherwise be interpreted as operator symbols. In character classes, you may
  318. use the \verb"-" character to denote ranges of characters. For instance,
  319. \verb"[a-z]" denotes the class of all lowercase letters.
  320. The expressions in a TP Lex program may be ambigious, i.e. there may be inputs
  321. which match more than one rule. In such a case, the lexical analyzer prefers
  322. the longest match and, if it still has the choice between different rules,
  323. it picks the first of these. If no rule matches, the lexical analyzer
  324. executes a default action which consists of copying the input character
  325. to the output unchanged. Thus, if the purpose of a lexical analyzer is
  326. to translate some parts of the input, and leave the rest unchanged, you
  327. only have to specify the patterns which have to be treated specially. If,
  328. however, the lexical analyzer has to absorb its whole input, you will have
  329. to provide rules that match everything. E.g., you might use the rules
  330. \begin{quote}\begin{verbatim}
  331. . |
  332. \n ;
  333. \end{verbatim}\end{quote}
  334. which match ``any other character'' (and ignore it).
  335. Sometimes certain patterns have to be analyzed differently depending on some
  336. amount of context in which the pattern appears. In such a case the \verb"/"
  337. operator is useful. For instance, the expression \verb"a/b" matches \verb"a",
  338. but only if followed by \verb"b". Note that the \verb"b" does not belong to
  339. the match; rather, the lexical analyzer, when matching an \verb"a", will look
  340. ahead in the input to see whether it is followed by a \verb"b", before it
  341. declares that it has matched an \verb"a". Such lookahead may be arbitrarily
  342. complex (up to the size of the \verb"LexLib" input buffer). E.g., the pattern
  343. \verb"a/.*b" matches an \verb"a" which is followed by a \verb"b" somewhere on
  344. the same input line. TP Lex also has a means to specify left context which is
  345. described in the next section.
  346. \subsection{Start Conditions}
  347. TP Lex provides some features which make it possible to handle left context.
  348. The \verb"^" character at the beginning of a regular expression may be used
  349. to denote the beginning of the line. More distant left context can be described
  350. conveniently by using start conditions on rules.
  351. Any rule which is prefixed with the \verb"<>" construct is only valid if the
  352. lexical analyzer is in the denoted start state. For instance, the expression
  353. \verb"<x>a" can only be matched if the lexical analyzer is in start state
  354. \verb"x". You can have multiple start states in a rule; e.g., \verb"<x,y>a"
  355. can be matched in start states \verb"x" or \verb"y".
  356. Start states have to be declared in the definitions section by means of
  357. one or more start state definitions (see above). The lexical analyzer enters
  358. a start state through a call to the \verb"LexLib" routine \verb"start". E.g.,
  359. you may write:
  360. \begin{quote}\begin{verbatim}
  361. %start x y
  362. %%
  363. <x>a start(y);
  364. <y>b start(x);
  365. %%
  366. begin
  367. start(x); if yylex=0 then ;
  368. end.
  369. \end{verbatim}\end{quote}
  370. Upon initialization, the lexical analyzer is put into state \verb"x". It then
  371. proceeds in state \verb"x" until it matches an \verb"a" which puts it into
  372. state \verb"y". In state \verb"y" it may match a \verb"b" which puts it into
  373. state \verb"x" again, etc.
  374. Start conditions are useful when certain constructs have to be analyzed
  375. differently depending on some left context (such as a special character
  376. at the beginning of the line), and if multiple lexical analyzers have to
  377. work in concert. If a rule is not prefixed with a start condition, it is
  378. valid in all user-defined start states, as well as in the lexical analyzer's
  379. default start state.
  380. \subsection{Lex Library}
  381. The TP Lex library (\verb"LexLib") unit provides various variables and
  382. routines which are used by Lex-generated lexical analyzers and application
  383. programs. It provides the input and output streams and other internal data
  384. structures used by the lexical analyzer routine, and supplies some variables
  385. and utility routines which may be used by actions and application programs.
  386. Refer to the file \verb"lexlib.pas" for a closer description.
  387. You can also modify the Lex library unit (and/or the code template in the
  388. \verb"yylex.cod" file) to customize TP Lex to your target applications. E.g.,
  389. you might wish to optimize the code of the lexical analyzer for some
  390. special application, make the analyzer read from/write to memory instead
  391. of files, etc.
  392. \subsection{Implementation Restrictions}
  393. Internal table sizes and the main memory available limit the complexity of
  394. source grammars that TP Lex can handle. There is currently no possibility to
  395. change internal table sizes (apart from modifying the sources of TP Lex
  396. itself), but the maximum table sizes provided by TP Lex seem to be large
  397. enough to handle most realistic applications. The actual table sizes depend on
  398. the particular implementation (they are much larger than the defaults if TP
  399. Lex has been compiled with one of the 32 bit compilers such as Delphi 2 or
  400. Free Pascal), and are shown in the statistics printed by TP Lex when a
  401. compilation is finished. The units given there are ``p'' (positions, i.e.
  402. items in the position table used to construct the DFA), ``s'' (DFA states) and
  403. ``t'' (transitions of the generated DFA).
  404. As implemented, the generated DFA table is stored as a typed array constant
  405. which is inserted into the \verb"yylex.cod" code template. The transitions in
  406. each state are stored in order. Of course it would have been more efficient to
  407. generate a big \verb"CASE" statement instead, but I found that this may cause
  408. problems with the encoding of large DFA tables because Turbo Pascal has
  409. a quite rigid limit on the code size of individual procedures. I decided to
  410. use a scheme in which transitions on different symbols to the same state are
  411. merged into one single transition (specifying a character set and the
  412. corresponding next state). This keeps the number of transitions in each state
  413. quite small and still allows a fairly efficient access to the transition
  414. table.
  415. The TP Lex program has an option (\verb"-o") to optimize DFA tables. This
  416. causes a minimal DFA to be generated, using the algorithm described in Aho,
  417. Sethi, Ullman (1986). Although the absolute limit on the number of DFA states
  418. that TP Lex can handle is at least 300, TP Lex poses an additional restriction
  419. (100) on the number of states in the initial partition of the DFA optimization
  420. algorithm. Thus, you may get a fatal \verb"integer set overflow" message when
  421. using the \verb"-o" option even when TP Lex is able to generate an unoptimized
  422. DFA. In such cases you will just have to be content with the unoptimized DFA.
  423. (Hopefully, this will be fixed in a future version. Anyhow, using the merged
  424. transitions scheme described above, TP Lex usually constructs unoptimized
  425. DFA's which are not far from being optimal, and thus in most cases DFA
  426. optimization won't have a great impact on DFA table sizes.)
  427. \subsection{Differences from UNIX Lex}
  428. Major differences between TP Lex and UNIX Lex are listed below.
  429. \begin{itemize}
  430. \item
  431. TP Lex produces output code for Turbo Pascal, rather than for C.
  432. \item
  433. Character tables (\verb"%T") are not supported; neither are any
  434. directives to determine internal table sizes (\verb"%p", \verb"%n",
  435. etc.).
  436. \item
  437. Library routines are named differently from the UNIX version (e.g.,
  438. the \verb"start" routine takes the place of the \verb"BEGIN" macro of
  439. UNIX Lex), and, of course, all macros of UNIX Lex (\verb"ECHO",
  440. \verb"REJECT", etc.) had to be implemented as procedures.
  441. \item
  442. The TP Lex library unit starts counting line numbers at 0, incrementing
  443. the count {\em before\/} a line is read (in contrast, UNIX Lex
  444. initializes \verb"yylineno" to 1 and increments it {\em after\/} the
  445. line end has been read). This is motivated by the way in which TP Lex
  446. maintains the current line, and will not affect your programs unless you
  447. explicitly reset the \verb"yylineno" value (e.g., when opening a new
  448. input file). In such a case you should set \verb"yylineno" to 0 rather
  449. than 1.
  450. \end{itemize}
  451. \section{TP Yacc}
  452. This section describes the TP Yacc compiler compiler.
  453. \subsection{Usage}
  454. \begin{quote}\begin{verbatim}
  455. yacc [options] yacc-file[.y] [output-file[.pas]]
  456. \end{verbatim}\end{quote}
  457. \subsection{Options}
  458. \begin{description}
  459. \item[\verb"-v"]
  460. ``Verbose:'' TP Yacc generates a readable description of the generated
  461. parser, written to \verb"yacc-file" with new extension \verb".lst".
  462. \item[\verb"-d"]
  463. ``Debug:'' TP Yacc generates parser with debugging output.
  464. \end{description}
  465. \subsection{Description}
  466. TP Yacc is a program that lets you prepare parsers from the description
  467. of input languages by BNF-like grammars. You simply specify the grammar
  468. for your target language, augmented with the Turbo Pascal code necessary
  469. to process the syntactic constructs, and TP Yacc translates your grammar
  470. into the Turbo Pascal code for a corresponding parser subroutine named
  471. \verb"yyparse".
  472. TP Yacc parses the source grammar contained in \verb"yacc-file" (with default
  473. suffix \verb".y") and writes the constructed parser subroutine to the
  474. specified \verb"output-file" (with default suffix \verb".pas"); if no output
  475. file is specified, output goes to \verb"yacc-file" with new suffix
  476. \verb".pas". If any errors are found during compilation, error messages are
  477. written to the list file (\verb"yacc-file" with new suffix \verb".lst").
  478. The generated parser routine, \verb"yyparse", is declared as:
  479. \begin{quote}\begin{verbatim}
  480. function yyparse : Integer;
  481. \end{verbatim}\end{quote}
  482. This routine may be called by your main program to execute the parser.
  483. The return value of the \verb"yyparse" routine denotes success or failure of
  484. the parser (possible return values: 0 = success, 1 = unrecoverable syntax
  485. error or parse stack overflow).
  486. Similar to TP Lex, the code template for the \verb"yyparse" routine may be
  487. found in the \verb"yyparse.cod" file. The rules for locating this file are
  488. analogous to those of TP Lex (see Section {\em TP Lex\/}).
  489. The TP Yacc library (\verb"YaccLib") unit is required by programs using Yacc-
  490. generated parsers; you will therefore have to put an appropriate \verb"uses"
  491. clause into your program or unit that contains the parser routine. The
  492. \verb"YaccLib" unit also provides some routines which may be used to control
  493. the actions of the parser. See the file \verb"yacclib.pas" for further
  494. information.
  495. \subsection{Yacc Source}
  496. A TP Yacc program consists of three sections separated with the \verb"%%"
  497. delimiter:
  498. \begin{quote}\begin{verbatim}
  499. definitions
  500. %%
  501. rules
  502. %%
  503. auxiliary procedures
  504. \end{verbatim}\end{quote}
  505. The TP Yacc language is free-format: whitespace (blanks, tabs and newlines)
  506. is ignored, except if it serves as a delimiter. Comments have the C-like
  507. format \verb"/* ... */". They are treated as whitespace. Grammar symbols are
  508. denoted by identifiers which have the usual form (letter, including
  509. underscore, followed by a sequence of letters and digits; upper- and
  510. lowercase is distinct). The TP Yacc language also has some keywords which
  511. always start with the \verb"%" character. Literals are denoted by characters
  512. enclosed in single quotes. The usual C-like escapes are recognized:
  513. \begin{itemize}
  514. \item \verb"\n" denotes newline
  515. \item \verb"\r" denotes carriage return
  516. \item \verb"\t" denotes tab
  517. \item \verb"\b" denotes backspace
  518. \item \verb"\f" denotes form feed
  519. \item \verb"\"$nnn$ denotes character no.\ $nnn$ in octal base
  520. \end{itemize}
  521. \subsection{Definitions}
  522. The first section of a TP Yacc grammar serves to define the symbols used in
  523. the grammar. It may contain the following types of definitions:
  524. \begin{itemize}
  525. \item
  526. start symbol definition: A definition of the form
  527. \begin{quote}\begin{verbatim}
  528. %start symbol
  529. \end{verbatim}\end{quote}
  530. declares the start nonterminal of the grammar (if this definition is
  531. omitted, TP Yacc assumes the left-hand side nonterminal of the first
  532. grammar rule as the start symbol of the grammar).
  533. \item
  534. terminal definitions: Definitions of the form
  535. \begin{quote}\begin{verbatim}
  536. %token symbol ...
  537. \end{verbatim}\end{quote}
  538. are used to declare the terminal symbols (``tokens'') of the target
  539. language. Any identifier not introduced in a \verb"%token" definition
  540. will be treated as a nonterminal symbol.
  541. As far as TP Yacc is concerned, tokens are atomic symbols which do not
  542. have an innert structure. A lexical analyzer must be provided which
  543. takes on the task of tokenizing the input stream and return the
  544. individual tokens and literals to the parser (see Section {\em Lexical
  545. Analysis\/}).
  546. \item
  547. precedence definitions: Operator symbols (terminals) may be associated
  548. with a precedence by means of a precedence definition which may have
  549. one of the following forms
  550. \begin{quote}\begin{verbatim}
  551. %left symbol ...
  552. %right symbol ...
  553. %nonassoc symbol ...
  554. \end{verbatim}\end{quote}
  555. which are used to declare left-, right- and nonassociative operators,
  556. respectively. Each precedence definition introduces a new precedence
  557. level, lowest precedence first. E.g., you may write:
  558. \begin{quote}\begin{verbatim}
  559. %nonassoc '<' '>' '=' GEQ LEQ NEQ
  560. /* relational operators */
  561. %left '+' '-' OR
  562. /* addition operators */
  563. %left '*' '/' AND
  564. /* multiplication operators */
  565. %right NOT UMINUS
  566. /* unary operators */
  567. \end{verbatim}\end{quote}
  568. A terminal identifier introduced in a precedence definition may, but
  569. need not, appear in a \verb"%token" definition as well.
  570. \item
  571. type definitions: Any (terminal or nonterminal) grammar symbol may be
  572. associated with a type identifier which is used in the processing of
  573. semantic values. Type tags of the form \verb"<name>" may be used in
  574. token and precedence definitions to declare the type of a terminal
  575. symbol, e.g.:
  576. \begin{quote}\begin{verbatim}
  577. %token <Real> NUM
  578. %left <AddOp> '+' '-'
  579. \end{verbatim}\end{quote}
  580. To declare the type of a nonterminal symbol, use a type definition of
  581. the form:
  582. \begin{quote}\begin{verbatim}
  583. %type <name> symbol ...
  584. \end{verbatim}\end{quote}
  585. e.g.:
  586. \begin{quote}\begin{verbatim}
  587. %type <Real> expr
  588. \end{verbatim}\end{quote}
  589. In a \verb"%type" definition, you may also omit the nonterminals, i.e.
  590. you may write:
  591. \begin{quote}\begin{verbatim}
  592. %type <name>
  593. \end{verbatim}\end{quote}
  594. This is useful when a given type is only used with type casts (see
  595. Section {\em Grammar Rules and Actions\/}), and is not associated with
  596. a specific nonterminal.
  597. \item
  598. Turbo Pascal declarations: You may also include arbitrary Turbo Pascal
  599. code in the definitions section, enclosed in \verb"%{ %}". This code
  600. will be inserted as global declarations into the output file, unchanged.
  601. \end{itemize}
  602. \subsection{Grammar Rules and Actions}
  603. The second part of a TP Yacc grammar contains the grammar rules for the
  604. target language. Grammar rules have the format
  605. \begin{quote}\begin{verbatim}
  606. name : symbol ... ;
  607. \end{verbatim}\end{quote}
  608. The left-hand side of a rule must be an identifier (which denotes a
  609. nonterminal symbol). The right-hand side may be an arbitrary (possibly
  610. empty) sequence of nonterminal and terminal symbols (including literals
  611. enclosed in single quotes). The terminating semicolon may also be omitted.
  612. Different rules for the same left-hand side symbols may be written using
  613. the \verb"|" character to separate the different alternatives:
  614. \begin{quote}\begin{verbatim}
  615. name : symbol ...
  616. | symbol ...
  617. ...
  618. ;
  619. \end{verbatim}\end{quote}
  620. For instance, to specify a simple grammar for arithmetic expressions, you
  621. may write:
  622. \begin{quote}\begin{verbatim}
  623. %left '+' '-'
  624. %left '*' '/'
  625. %token NUM
  626. %%
  627. expr : expr '+' expr
  628. | expr '-' expr
  629. | expr '*' expr
  630. | expr '/' expr
  631. | '(' expr ')'
  632. | NUM
  633. ;
  634. \end{verbatim}\end{quote}
  635. (The \verb"%left" definitions at the beginning of the grammar are needed to
  636. specify the precedence and associativity of the operator symbols. This will be
  637. discussed in more detail in Section {\em Ambigious Grammars\/}.)
  638. Grammar rules may contain actions -- Turbo Pascal statements enclosed in
  639. \verb"{ }" -- to be executed as the corresponding rules are recognized.
  640. Furthermore, rules may return values, and access values returned by other
  641. rules. These ``semantic'' values are written as \verb"$$" (value of the
  642. left-hand side nonterminal) and \verb"$i" (value of the $i$th right-hand
  643. side symbol). They are kept on a special value stack which is maintained
  644. automatically by the parser.
  645. Values associated with terminal symbols must be set by the lexical analyzer
  646. (more about this in Section {\em Lexical Analysis\/}). Actions of the form
  647. \verb"$$ := $1" can frequently be omitted, since it is the default action
  648. assumed by TP Yacc for any rule that does not have an explicit action.
  649. By default, the semantic value type provided by Yacc is \verb"Integer". You
  650. can also put a declaration like
  651. \begin{quote}\begin{verbatim}
  652. %{
  653. type YYSType = Real;
  654. %}
  655. \end{verbatim}\end{quote}
  656. into the definitions section of your Yacc grammar to change the default value
  657. type. However, if you have different value types, the preferred method is to
  658. use type definitions as discussed in Section {\em Definitions\/}. When such
  659. type definitions are given, TP Yacc handles all the necessary details of the
  660. \verb"YYSType" definition and also provides a fair amount of type checking
  661. which makes it easier to find type errors in the grammar.
  662. For instance, we may declare the symbols \verb"NUM" and \verb"expr" in the
  663. example above to be of type \verb"Real", and then use these values to
  664. evaluate an expression as it is parsed.
  665. \begin{quote}\begin{verbatim}
  666. %left '+' '-'
  667. %left '*' '/'
  668. %token <Real> NUM
  669. %type <Real> expr
  670. %%
  671. expr : expr '+' expr { $$ := $1+$3; }
  672. | expr '-' expr { $$ := $1-$3; }
  673. | expr '*' expr { $$ := $1*$3; }
  674. | expr '/' expr { $$ := $1/$3; }
  675. | '(' expr ')' { $$ := $2; }
  676. | NUM
  677. ;
  678. \end{verbatim}\end{quote}
  679. (Note that we omitted the action of the last rule. The ``copy action''
  680. \verb"$$ := $1" required by this rule is automatically added by TP Yacc.)
  681. Actions may not only appear at the end, but also in the middle of a rule
  682. which is useful to perform some processing before a rule is fully parsed.
  683. Such actions inside a rule are treated as special nonterminals which are
  684. associated with an empty right-hand side. Thus, a rule like
  685. \begin{quote}\begin{verbatim}
  686. x : y { action; } z
  687. \end{verbatim}\end{quote}
  688. will be treated as:
  689. \begin{quote}\begin{verbatim}
  690. x : y $act z
  691. $act : { action; }
  692. \end{verbatim}\end{quote}
  693. Actions inside a rule may also access values to the left of the action,
  694. and may return values by assigning to the \verb"$$" value. The value returned
  695. by such an action can then be accessed by other actions using the usual
  696. \verb"$i" notation. E.g., we may write:
  697. \begin{quote}\begin{verbatim}
  698. x : y { $$ := 2*$1; } z { $$ := $2+$3; }
  699. \end{verbatim}\end{quote}
  700. which has the effect of setting the value of \verb"x" to
  701. \begin{quote}\begin{verbatim}
  702. 2*(the value of y)+(the value of z).
  703. \end{verbatim}\end{quote}
  704. Sometimes it is desirable to access values in enclosing rules. This can be
  705. done using the notation \verb"$i" with $i\leq 0$. \verb"$0" refers to the
  706. first value ``to the left'' of the current rule, \verb"$-1" to the second,
  707. and so on. Note that in this case the referenced value depends on the actual
  708. contents of the parse stack, so you have to make sure that the requested
  709. values are always where you expect them.
  710. There are some situations in which TP Yacc cannot easily determine the
  711. type of values (when a typed parser is used). This is true, in particular,
  712. for values in enclosing rules and for the \verb"$$" value in an action inside
  713. a rule. In such cases you may use a type cast to explicitly specify the type
  714. of a value. The format for such type casts is \verb"$<name>$" (for left-hand
  715. side values) and \verb"$<name>i" (for right-hand side values) where
  716. \verb"name" is a type identifier (which must occur in a \verb"%token",
  717. precedence or \verb"%type" definition).
  718. \subsection{Auxiliary Procedures}
  719. The third section of a TP Yacc program is optional. If it is present, it
  720. may contain any Turbo Pascal code (such as supporting routines or a main
  721. program) which is tacked on to the end of the output file.
  722. \subsection{Lexical Analysis}
  723. For any TP Yacc-generated parser, the programmer must supply a lexical
  724. analyzer routine named \verb"yylex" which performs the lexical analysis for
  725. the parser. This routine must be declared as
  726. \begin{quote}\begin{verbatim}
  727. function yylex : Integer;
  728. \end{verbatim}\end{quote}
  729. The \verb"yylex" routine may either be prepared by hand, or by using the
  730. lexical analyzer generator TP Lex (see Section {\em TP Lex\/}).
  731. The lexical analyzer must be included in your main program behind the
  732. parser subroutine (the \verb"yyparse" code template includes a forward
  733. definition of the \verb"yylex" routine such that the parser can access the
  734. lexical analyzer). For instance, you may put the lexical analyzer
  735. routine into the auxiliary procedures section of your TP Yacc grammar,
  736. either directly, or by using the the Turbo Pascal include directive
  737. (\verb"$I").
  738. The parser repeatedly calls the \verb"yylex" routine to tokenize the input
  739. stream and obtain the individual lexical items in the input. For any
  740. literal character, the \verb"yylex" routine has to return the corresponding
  741. character code. For the other, symbolic, terminals of the input language,
  742. the lexical analyzer must return corresponding integer codes. These are
  743. assigned automatically by TP Yacc in the order in which token definitions
  744. appear in the definitions section of the source grammar. The lexical
  745. analyzer can access these values through corresponding integer constants
  746. which are declared by TP Yacc in the output file.
  747. For instance, if
  748. \begin{quote}\begin{verbatim}
  749. %token NUM
  750. \end{verbatim}\end{quote}
  751. is the first definition in the Yacc grammar, then TP Yacc will create
  752. a corresponding constant declaration
  753. \begin{quote}\begin{verbatim}
  754. const NUM = 257;
  755. \end{verbatim}\end{quote}
  756. in the output file (TP Yacc automatically assigns symbolic token numbers
  757. starting at 257; 1 thru 255 are reserved for character literals, 0 denotes
  758. end-of-file, and 256 is reserved for the special error token which will be
  759. discussed in Section {\em Error Handling\/}). This definition may then be
  760. used, e.g., in a corresponding TP Lex program as follows:
  761. \begin{quote}\begin{verbatim}
  762. [0-9]+ return(NUM);
  763. \end{verbatim}\end{quote}
  764. You can also explicitly assign token numbers in the grammar. For this
  765. purpose, the first occurrence of a token identifier in the definitions
  766. section may be followed by an unsigned integer. E.g. you may write:
  767. \begin{quote}\begin{verbatim}
  768. %token NUM 299
  769. \end{verbatim}\end{quote}
  770. Besides the return value of \verb"yylex", the lexical analyzer routine may
  771. also return an additional semantic value for the recognized token. This value
  772. is assigned to a variable named \verb"yylval" and may then be accessed in
  773. actions through the \verb"$i" notation (see above, Section {\em Grammar
  774. Rules and Actions\/}). The \verb"yylval" variable is of type \verb"YYSType"
  775. (the semantic value type, \verb"Integer" by default); its declaration may be
  776. found in the \verb"yyparse.cod" file.
  777. For instance, to assign an \verb"Integer" value to a \verb"NUM" token in the
  778. above example, we may write:
  779. \begin{quote}\begin{verbatim}
  780. [0-9]+ begin
  781. val(yytext, yylval, code);
  782. return(NUM);
  783. end;
  784. \end{verbatim}\end{quote}
  785. This assigns \verb"yylval" the value of the \verb"NUM" token (using the Turbo
  786. Pascal standard procedure \verb"val").
  787. If a parser uses tokens of different types (via a \verb"%token <name>"
  788. definition), then the \verb"yylval" variable will not be of type
  789. \verb"Integer", but instead of a corresponding variant record type which is
  790. capable of holding all the different value types declared in the TP Yacc
  791. grammar. In this case, the lexical analyzer must assign a semantic value to
  792. the corresponding record component which is named \verb"yy"{\em name\/}
  793. (where {\em name\/} stands for the corresponding type identifier).
  794. E.g., if token \verb"NUM" is declared \verb"Real":
  795. \begin{quote}\begin{verbatim}
  796. %token <Real> NUM
  797. \end{verbatim}\end{quote}
  798. then the value for token \verb"NUM" must be assigned to \verb"yylval.yyReal".
  799. \subsection{How The Parser Works}
  800. TP Yacc uses the LALR(1) technique developed by Donald E.\ Knuth and F.\
  801. DeRemer to construct a simple, efficient, non-backtracking bottom-up
  802. parser for the source grammar. The LALR parsing technique is described
  803. in detail in Aho/Sethi/Ullman (1986). It is quite instructive to take a
  804. look at the parser description TP Yacc generates from a small sample
  805. grammar, to get an idea of how the LALR parsing algorithm works. We
  806. consider the following simplified version of the arithmetic expression
  807. grammar:
  808. \begin{quote}\begin{verbatim}
  809. %token NUM
  810. %left '+'
  811. %left '*'
  812. %%
  813. expr : expr '+' expr
  814. | expr '*' expr
  815. | '(' expr ')'
  816. | NUM
  817. ;
  818. \end{verbatim}\end{quote}
  819. When run with the \verb"-v" option on the above grammar, TP Yacc generates
  820. the parser description listed below.
  821. \begin{quote}\begin{verbatim}
  822. state 0:
  823. $accept : _ expr $end
  824. '(' shift 2
  825. NUM shift 3
  826. . error
  827. expr goto 1
  828. state 1:
  829. $accept : expr _ $end
  830. expr : expr _ '+' expr
  831. expr : expr _ '*' expr
  832. $end accept
  833. '*' shift 4
  834. '+' shift 5
  835. . error
  836. state 2:
  837. expr : '(' _ expr ')'
  838. '(' shift 2
  839. NUM shift 3
  840. . error
  841. expr goto 6
  842. state 3:
  843. expr : NUM _ (4)
  844. . reduce 4
  845. state 4:
  846. expr : expr '*' _ expr
  847. '(' shift 2
  848. NUM shift 3
  849. . error
  850. expr goto 7
  851. state 5:
  852. expr : expr '+' _ expr
  853. '(' shift 2
  854. NUM shift 3
  855. . error
  856. expr goto 8
  857. state 6:
  858. expr : '(' expr _ ')'
  859. expr : expr _ '+' expr
  860. expr : expr _ '*' expr
  861. ')' shift 9
  862. '*' shift 4
  863. '+' shift 5
  864. . error
  865. state 7:
  866. expr : expr '*' expr _ (2)
  867. expr : expr _ '+' expr
  868. expr : expr _ '*' expr
  869. . reduce 2
  870. state 8:
  871. expr : expr '+' expr _ (1)
  872. expr : expr _ '+' expr
  873. expr : expr _ '*' expr
  874. '*' shift 4
  875. $end reduce 1
  876. ')' reduce 1
  877. '+' reduce 1
  878. . error
  879. state 9:
  880. expr : '(' expr ')' _ (3)
  881. . reduce 3
  882. \end{verbatim}\end{quote}
  883. Each state of the parser corresponds to a certain prefix of the input
  884. which has already been seen. The parser description lists the grammar
  885. rules wich are parsed in each state, and indicates the portion of each
  886. rule which has already been parsed by an underscore. In state 0, the
  887. start state of the parser, the parsed rule is
  888. \begin{quote}\begin{verbatim}
  889. $accept : expr $end
  890. \end{verbatim}\end{quote}
  891. This is not an actual grammar rule, but a starting rule automatically
  892. added by TP Yacc. In general, it has the format
  893. \begin{quote}\begin{verbatim}
  894. $accept : X $end
  895. \end{verbatim}\end{quote}
  896. where \verb"X" is the start nonterminal of the grammar, and \verb"$end" is
  897. a pseudo token denoting end-of-input (the \verb"$end" symbol is used by the
  898. parser to determine when it has successfully parsed the input).
  899. The description of the start rule in state 0,
  900. \begin{quote}\begin{verbatim}
  901. $accept : _ expr $end
  902. \end{verbatim}\end{quote}
  903. with the underscore positioned before the \verb"expr" symbol, indicates that
  904. we are at the beginning of the parse and are ready to parse an expression
  905. (nonterminal \verb"expr").
  906. The parser maintains a stack to keep track of states visited during the
  907. parse. There are two basic kinds of actions in each state: {\em shift\/},
  908. which reads an input symbol and pushes the corresponding next state on top of
  909. the stack, and {\em reduce\/} which pops a number of states from the stack
  910. (corresponding to the number of right-hand side symbols of the rule used
  911. in the reduction) and consults the {\em goto\/} entries of the uncovered
  912. state to find the transition corresponding to the left-hand side symbol of the
  913. reduced rule.
  914. In each step of the parse, the parser is in a given state (the state on
  915. top of its stack) and may consult the current {\em lookahead symbol\/}, the
  916. next symbol in the input, to determine the parse action -- shift or reduce --
  917. to perform. The parser terminates as soon as it reaches state 1 and reads
  918. in the endmarker, indicated by the {\em accept\/} action on \verb"$end" in
  919. state 1.
  920. Sometimes the parser may also carry out an action without inspecting the
  921. current lookahead token. This is the case, e.g., in state 3 where the
  922. only action is reduction by rule 4:
  923. \begin{quote}\begin{verbatim}
  924. . reduce 4
  925. \end{verbatim}\end{quote}
  926. The default action in a state can also be {\em error\/} indicating that any
  927. other input represents a syntax error. (In case of such an error the
  928. parser will start syntactic error recovery, as described in Section
  929. {\em Error Handling\/}.)
  930. Now let us see how the parser responds to a given input. We consider the
  931. input string \verb"2+5*3" which is presented to the parser as the token
  932. sequence:
  933. \begin{quote}\begin{verbatim}
  934. NUM + NUM * NUM
  935. \end{verbatim}\end{quote}
  936. Table \ref{tab2} traces the corresponding actions of the parser. We also
  937. show the current state in each move, and the remaining states on the stack.
  938. \begin{table*}\centering
  939. \begin{tabular}{l|l|l|p{8cm}}
  940. \hline\hline
  941. {\sc State}& {\sc Stack}& {\sc Lookahead}& {\sc Action}\\
  942. \hline
  943. 0 & & \verb"NUM" & shift state 3\\
  944. 3 & 0 & & reduce rule 4 (pop 1 state, uncovering state
  945. 0, then goto state 1 on symbol \verb"expr")\\
  946. 1 & 0 & \verb"+" & shift state 5\\
  947. 5 & 1 0 & \verb"NUM" & shift state 3\\
  948. 3 & 5 1 0 & & reduce rule 4 (pop 1 state, uncovering state
  949. 5, then goto state 8 on symbol \verb"expr")\\
  950. 8 & 5 1 0 & \verb"*" & shift 4\\
  951. 4 & 8 5 1 0 & \verb"NUM" & shift 3\\
  952. 3 & 4 8 5 1 0 & & reduce rule 4 (pop 1 state, uncovering state
  953. 4, then goto state 7 on symbol \verb"expr")\\
  954. 7 & 4 8 5 1 0 & & reduce rule 2 (pop 3 states, uncovering state
  955. 5, then goto state 8 on symbol \verb"expr")\\
  956. 8 & 5 1 0 & \verb"$end" & reduce rule 1 (pop 3 states, uncovering state
  957. 0, then goto state 1 on symbol \verb"expr")\\
  958. 1 & 0 & \verb"$end" & accept\\
  959. \hline
  960. \end{tabular}
  961. \caption{Parse of \protect\verb"NUM + NUM * NUM".}
  962. \label{tab2}
  963. \end{table*}
  964. It is also instructive to see how the parser responds to illegal inputs.
  965. E.g., you may try to figure out what the parser does when confronted with:
  966. \begin{quote}\begin{verbatim}
  967. NUM + )
  968. \end{verbatim}\end{quote}
  969. or:
  970. \begin{quote}\begin{verbatim}
  971. ( NUM * NUM
  972. \end{verbatim}\end{quote}
  973. You will find that the parser, sooner or later, will always run into an
  974. error action when confronted with errorneous inputs. An LALR parser will
  975. never shift an invalid symbol and thus will always find syntax errors as
  976. soon as it is possible during a left-to-right scan of the input.
  977. TP Yacc provides a debugging option (\verb"-d") that may be used to trace
  978. the actions performed by the parser. When a grammar is compiled with the
  979. \verb"-d" option, the generated parser will print out the actions as it
  980. parses its input.
  981. \subsection{Ambigious Grammars}
  982. There are situations in which TP Yacc will not produce a valid parser for
  983. a given input language. LALR(1) parsers are restricted to one-symbol
  984. lookahead on which they have to base their parsing decisions. If a
  985. grammar is ambigious, or cannot be parsed unambigiously using one-symbol
  986. lookahead, TP Yacc will generate parsing conflicts when constructing the
  987. parse table. There are two types of such conflicts: {\em shift/reduce
  988. conflicts\/} (when there is both a shift and a reduce action for a given
  989. input symbol in a given state), and {\em reduce/reduce\/} conflicts (if
  990. there is more than one reduce action for a given input symbol in a given
  991. state). Note that there never will be a shift/shift conflict.
  992. When a grammar generates parsing conflicts, TP Yacc prints out the number
  993. of shift/reduce and reduce/reduce conflicts it encountered when constructing
  994. the parse table. However, TP Yacc will still generate the output code for the
  995. parser. To resolve parsing conflicts, TP Yacc uses the following built-in
  996. disambiguating rules:
  997. \begin{itemize}
  998. \item
  999. in a shift/reduce conflict, TP Yacc chooses the shift action.
  1000. \item
  1001. in a reduce/reduce conflict, TP Yacc chooses reduction of the first
  1002. grammar rule.
  1003. \end{itemize}
  1004. The shift/reduce disambiguating rule correctly resolves a type of
  1005. ambiguity known as the ``dangling-else ambiguity'' which arises in the
  1006. syntax of conditional statements of many programming languages (as in
  1007. Pascal):
  1008. \begin{quote}\begin{verbatim}
  1009. %token IF THEN ELSE
  1010. %%
  1011. stmt : IF expr THEN stmt
  1012. | IF expr THEN stmt ELSE stmt
  1013. ;
  1014. \end{verbatim}\end{quote}
  1015. This grammar is ambigious, because a nested construct like
  1016. \begin{quote}\begin{verbatim}
  1017. IF expr-1 THEN IF expr-2 THEN stmt-1
  1018. ELSE stmt-2
  1019. \end{verbatim}\end{quote}
  1020. can be parsed two ways, either as:
  1021. \begin{quote}\begin{verbatim}
  1022. IF expr-1 THEN ( IF expr-2 THEN stmt-1
  1023. ELSE stmt-2 )
  1024. \end{verbatim}\end{quote}
  1025. or as:
  1026. \begin{quote}\begin{verbatim}
  1027. IF expr-1 THEN ( IF expr-2 THEN stmt-1 )
  1028. ELSE stmt-2
  1029. \end{verbatim}\end{quote}
  1030. The first interpretation makes an \verb"ELSE" belong to the last unmatched
  1031. \verb"IF" which also is the interpretation chosen in most programming
  1032. languages. This is also the way that a TP Yacc-generated parser will parse
  1033. the construct since the shift/reduce disambiguating rule has the effect of
  1034. neglecting the reduction of \verb"IF expr-2 THEN stmt-1"; instead, the parser
  1035. will shift the \verb"ELSE" symbol which eventually leads to the reduction of
  1036. \verb"IF expr-2 THEN stmt-1 ELSE stmt-2".
  1037. The reduce/reduce disambiguating rule is used to resolve conflicts that
  1038. arise when there is more than one grammar rule matching a given construct.
  1039. Such ambiguities are often caused by ``special case constructs'' which may be
  1040. given priority by simply listing the more specific rules ahead of the more
  1041. general ones.
  1042. For instance, the following is an excerpt from the grammar describing the
  1043. input language of the UNIX equation formatter EQN:
  1044. \begin{quote}\begin{verbatim}
  1045. %right SUB SUP
  1046. %%
  1047. expr : expr SUB expr SUP expr
  1048. | expr SUB expr
  1049. | expr SUP expr
  1050. ;
  1051. \end{verbatim}\end{quote}
  1052. Here, the \verb"SUB" and \verb"SUP" operator symbols denote sub- and
  1053. superscript, respectively. The rationale behind this example is that
  1054. an expression involving both sub- and superscript is often set differently
  1055. from a superscripted subscripted expression (compare $x_i^n$ to ${x_i}^n$).
  1056. This special case is therefore caught by the first rule in the above example
  1057. which causes a reduce/reduce conflict with rule 3 in expressions like
  1058. \verb"expr-1 SUB expr-2 SUP expr-3". The conflict is resolved in favour of
  1059. the first rule.
  1060. In both cases discussed above, the ambiguities could also be eliminated
  1061. by rewriting the grammar accordingly (although this yields more complicated
  1062. and less readable grammars). This may not always be the case. Often
  1063. ambiguities are also caused by design errors in the grammar. Hence, if
  1064. TP Yacc reports any parsing conflicts when constructing the parser, you
  1065. should use the \verb"-v" option to generate the parser description
  1066. (\verb".lst" file) and check whether TP Yacc resolved the conflicts correctly.
  1067. There is one type of syntactic constructs for which one often deliberately
  1068. uses an ambigious grammar as a more concise representation for a language
  1069. that could also be specified unambigiously: the syntax of expressions.
  1070. For instance, the following is an unambigious grammar for simple arithmetic
  1071. expressions:
  1072. \begin{quote}\begin{verbatim}
  1073. %token NUM
  1074. %%
  1075. expr : term
  1076. | expr '+' term
  1077. ;
  1078. term : factor
  1079. | term '*' factor
  1080. ;
  1081. factor : '(' expr ')'
  1082. | NUM
  1083. ;
  1084. \end{verbatim}\end{quote}
  1085. You may check yourself that this grammar gives \verb"*" a higher precedence
  1086. than \verb"+" and makes both operators left-associative. The same effect can
  1087. be achieved with the following ambigious grammar using precedence definitions:
  1088. \begin{quote}\begin{verbatim}
  1089. %token NUM
  1090. %left '+'
  1091. %left '*'
  1092. %%
  1093. expr : expr '+' expr
  1094. | expr '*' expr
  1095. | '(' expr ')'
  1096. | NUM
  1097. ;
  1098. \end{verbatim}\end{quote}
  1099. Without the precedence definitions, this is an ambigious grammar causing
  1100. a number of shift/reduce conflicts. The precedence definitions are used
  1101. to correctly resolve these conflicts (conflicts resolved using precedence
  1102. will not be reported by TP Yacc).
  1103. Each precedence definition introduces a new precedence level (lowest
  1104. precedence first) and specifies whether the corresponding operators
  1105. should be left-, right- or nonassociative (nonassociative operators
  1106. cannot be combined at all; example: relational operators in Pascal).
  1107. TP Yacc uses precedence information to resolve shift/reduce conflicts as
  1108. follows. Precedences are associated with each terminal occuring in a
  1109. precedence definition. Furthermore, each grammar rule is given the
  1110. precedence of its rightmost terminal (this default choice can be
  1111. overwritten using a \verb"%prec" tag; see below). To resolve a shift/reduce
  1112. conflict using precedence, both the symbol and the rule involved must
  1113. have been assigned precedences. TP Yacc then chooses the parse action
  1114. as follows:
  1115. \begin{itemize}
  1116. \item
  1117. If the symbol has higher precedence than the rule: shift.
  1118. \item
  1119. If the rule has higher precedence than the symbol: reduce.
  1120. \item
  1121. If symbol and rule have the same precedence, the associativity of the
  1122. symbol determines the parse action: if the symbol is left-associative:
  1123. reduce; if the symbol is right-associative: shift; if the symbol is
  1124. non-associative: error.
  1125. \end{itemize}
  1126. To give you an idea of how this works, let us consider our ambigious
  1127. arithmetic expression grammar (without precedences):
  1128. \begin{quote}\begin{verbatim}
  1129. %token NUM
  1130. %%
  1131. expr : expr '+' expr
  1132. | expr '*' expr
  1133. | '(' expr ')'
  1134. | NUM
  1135. ;
  1136. \end{verbatim}\end{quote}
  1137. This grammar generates four shift/reduce conflicts. The description
  1138. of state 8 reads as follows:
  1139. \begin{quote}\begin{verbatim}
  1140. state 8:
  1141. *** conflicts:
  1142. shift 4, reduce 1 on '*'
  1143. shift 5, reduce 1 on '+'
  1144. expr : expr '+' expr _ (1)
  1145. expr : expr _ '+' expr
  1146. expr : expr _ '*' expr
  1147. '*' shift 4
  1148. '+' shift 5
  1149. $end reduce 1
  1150. ')' reduce 1
  1151. . error
  1152. \end{verbatim}\end{quote}
  1153. In this state, we have successfully parsed a \verb"+" expression (rule 1).
  1154. When the next symbol is \verb"+" or \verb"*", we have the choice between the
  1155. reduction and shifting the symbol. Using the default shift/reduce
  1156. disambiguating rule, TP Yacc has resolved these conflicts in favour of shift.
  1157. Now let us assume the above precedence definition:
  1158. \begin{quote}\begin{verbatim}
  1159. %left '+'
  1160. %left '*'
  1161. \end{verbatim}\end{quote}
  1162. which gives \verb"*" higher precedence than \verb"+" and makes both operators
  1163. left-associative. The rightmost terminal in rule 1 is \verb"+". Hence, given
  1164. these precedence definitions, the first conflict will be resolved in favour
  1165. of shift (\verb"*" has higher precedence than \verb"+"), while the second one
  1166. is resolved in favour of reduce (\verb"+" is left-associative).
  1167. Similar conflicts arise in state 7:
  1168. \begin{quote}\begin{verbatim}
  1169. state 7:
  1170. *** conflicts:
  1171. shift 4, reduce 2 on '*'
  1172. shift 5, reduce 2 on '+'
  1173. expr : expr '*' expr _ (2)
  1174. expr : expr _ '+' expr
  1175. expr : expr _ '*' expr
  1176. '*' shift 4
  1177. '+' shift 5
  1178. $end reduce 2
  1179. ')' reduce 2
  1180. . error
  1181. \end{verbatim}\end{quote}
  1182. Here, we have successfully parsed a \verb"*" expression which may be followed
  1183. by another \verb"+" or \verb"*" operator. Since \verb"*" is left-associative
  1184. and has higher precedence than \verb"+", both conflicts will be resolved in
  1185. favour of reduce.
  1186. Of course, you can also have different operators on the same precedence
  1187. level. For instance, consider the following extended version of the
  1188. arithmetic expression grammar:
  1189. \begin{quote}\begin{verbatim}
  1190. %token NUM
  1191. %left '+' '-'
  1192. %left '*' '/'
  1193. %%
  1194. expr : expr '+' expr
  1195. | expr '-' expr
  1196. | expr '*' expr
  1197. | expr '/' expr
  1198. | '(' expr ')'
  1199. | NUM
  1200. ;
  1201. \end{verbatim}\end{quote}
  1202. This puts all ``addition'' operators on the first and all ``multiplication''
  1203. operators on the second precedence level. All operators are left-associative;
  1204. for instance, \verb"5+3-2" will be parsed as \verb"(5+3)-2".
  1205. By default, TP Yacc assigns each rule the precedence of its rightmost
  1206. terminal. This is a sensible decision in most cases. Occasionally, it
  1207. may be necessary to overwrite this default choice and explicitly assign
  1208. a precedence to a rule. This can be done by putting a precedence tag
  1209. of the form
  1210. \begin{quote}\begin{verbatim}
  1211. %prec symbol
  1212. \end{verbatim}\end{quote}
  1213. at the end of the corresponding rule which gives the rule the precedence
  1214. of the specified symbol. For instance, to extend the expression grammar
  1215. with a unary minus operator, giving it highest precedence, you may write:
  1216. \begin{quote}\begin{verbatim}
  1217. %token NUM
  1218. %left '+' '-'
  1219. %left '*' '/'
  1220. %right UMINUS
  1221. %%
  1222. expr : expr '+' expr
  1223. | expr '-' expr
  1224. | expr '*' expr
  1225. | expr '/' expr
  1226. | '-' expr %prec UMINUS
  1227. | '(' expr ')'
  1228. | NUM
  1229. ;
  1230. \end{verbatim}\end{quote}
  1231. Note the use of the \verb"UMINUS" token which is not an actual input symbol
  1232. but whose sole purpose it is to give unary minus its proper precedence. If
  1233. we omitted the precedence tag, both unary and binary minus would have the
  1234. same precedence because they are represented by the same input symbol.
  1235. \subsection{Error Handling}
  1236. Syntactic error handling is a difficult area in the design of user-friendly
  1237. parsers. Usually, you will not like to have the parser give up upon the
  1238. first occurrence of an errorneous input symbol. Instead, the parser should
  1239. recover from a syntax error, that is, it should try to find a place in the
  1240. input where it can resume the parse.
  1241. TP Yacc provides a general mechanism to implement parsers with error
  1242. recovery. A special predefined \verb"error" token may be used in grammar rules
  1243. to indicate positions where syntax errors might occur. When the parser runs
  1244. into an error action (i.e., reads an errorneous input symbol) it prints out
  1245. an error message and starts error recovery by popping its stack until it
  1246. uncovers a state in which there is a shift action on the \verb"error" token.
  1247. If there is no such state, the parser terminates with return value 1,
  1248. indicating an unrecoverable syntax error. If there is such a state, the
  1249. parser takes the shift on the \verb"error" token (pretending it has seen
  1250. an imaginary \verb"error" token in the input), and resumes parsing in a
  1251. special ``error mode.''
  1252. While in error mode, the parser quietly skips symbols until it can again
  1253. perform a legal shift action. To prevent a cascade of error messages, the
  1254. parser returns to its normal mode of operation only after it has seen
  1255. and shifted three legal input symbols. Any additional error found after
  1256. the first shifted symbol restarts error recovery, but no error message
  1257. is printed. The TP Yacc library routine \verb"yyerrok" may be used to reset
  1258. the parser to its normal mode of operation explicitly.
  1259. For a simple example, consider the rule
  1260. \begin{quote}\begin{verbatim}
  1261. stmt : error ';' { yyerrok; }
  1262. \end{verbatim}\end{quote}
  1263. and assume a syntax error occurs while a statement (nonterminal \verb"stmt")
  1264. is parsed. The parser prints an error message, then pops its stack until it
  1265. can shift the token \verb"error" of the error rule. Proceeding in error mode,
  1266. it will skip symbols until it finds a semicolon, then reduces by the error
  1267. rule. The call to \verb"yyerrok" tells the parser that we have recovered from
  1268. the error and that it should proceed with the normal parse. This kind of
  1269. ``panic mode'' error recovery scheme works well when statements are always
  1270. terminated with a semicolon. The parser simply skips the ``bad'' statement
  1271. and then resumes the parse.
  1272. Implementing a good error recovery scheme can be a difficult task; see
  1273. Aho/Sethi/Ullman (1986) for a more comprehensive treatment of this topic.
  1274. Schreiner and Friedman have developed a systematic technique to implement
  1275. error recovery with Yacc which I found quite useful (I used it myself
  1276. to implement error recovery in the TP Yacc parser); see Schreiner/Friedman
  1277. (1985).
  1278. \subsection{Yacc Library}
  1279. The TP Yacc library (\verb"YaccLib") unit provides some global declarations
  1280. used by the parser routine \verb"yyparse", and some variables and utility
  1281. routines which may be used to control the actions of the parser and to
  1282. implement error recovery. See the file \verb"yacclib.pas" for a description
  1283. of these variables and routines.
  1284. You can also modify the Yacc library unit (and/or the code template in the
  1285. \verb"yyparse.cod" file) to customize TP Yacc to your target applications.
  1286. \subsection{Other Features}
  1287. TP Yacc supports all additional language elements entitled as ``Old Features
  1288. Supported But not Encouraged'' in the UNIX manual, which are provided for
  1289. backward compatibility with older versions of (UNIX) Yacc:
  1290. \begin{itemize}
  1291. \item
  1292. literals delimited by double quotes.
  1293. \item
  1294. multiple-character literals. Note that these are not treated as
  1295. character sequences but represent single tokens which are given a
  1296. symbolic integer code just like any other token identifier. However,
  1297. they will not be declared in the output file, so you have to make sure
  1298. yourself that the lexical analyzer returns the correct codes for these
  1299. symbols. E.g., you might explicitly assign token numbers by using a
  1300. definition like
  1301. \begin{quote}\begin{verbatim}
  1302. %token ':=' 257
  1303. \end{verbatim}\end{quote}
  1304. at the beginning of the Yacc grammar.
  1305. \item
  1306. \verb"\" may be used instead of \verb"%", i.e. \verb"\\" means
  1307. \verb"%%", \verb"\left" is the same as \verb"%left", etc.
  1308. \item
  1309. other synonyms:
  1310. \begin{itemize}
  1311. \item \verb"%<" for \verb"%left"
  1312. \item \verb"%>" for \verb"%right"
  1313. \item \verb"%binary" or \verb"%2" for \verb"%nonassoc"
  1314. \item \verb"%term" or \verb"%0" for \verb"%token"
  1315. \item \verb"%=" for \verb"%prec"
  1316. \end{itemize}
  1317. \item
  1318. actions may also be written as \verb"= { ... }" or
  1319. \verb"= single-statement;"
  1320. \item
  1321. Turbo Pascal declarations (\verb"%{ ... %}") may be put at the
  1322. beginning of the rules section. They will be treated as local
  1323. declarations of the actions routine.
  1324. \end{itemize}
  1325. \subsection{Implementation Restrictions}
  1326. As with TP Lex, internal table sizes and the main memory available limit the
  1327. complexity of source grammars that TP Yacc can handle. However, the maximum
  1328. table sizes provided by TP Yacc are large enough to handle quite complex
  1329. grammars (such as the Pascal grammar in the TP Yacc distribution). The actual
  1330. table sizes are shown in the statistics printed by TP Yacc when a compilation
  1331. is finished. The given figures are "s" (states), "i" (LR0 kernel items), "t"
  1332. (shift and goto transitions) and "r" (reductions).
  1333. The default stack size of the generated parsers is \verb"yymaxdepth = 1024",
  1334. as declared in the TP Yacc library unit. This should be sufficient for any
  1335. average application, but you can change the stack size by including a
  1336. corresponding declaration in the definitions part of the Yacc grammar
  1337. (or change the value in the \verb"YaccLib" unit). Note that right-recursive
  1338. grammar rules may increase stack space requirements, so it is a good
  1339. idea to use left-recursive rules wherever possible.
  1340. \subsection{Differences from UNIX Yacc}
  1341. Major differences between TP Yacc and UNIX Yacc are listed below.
  1342. \begin{itemize}
  1343. \item
  1344. TP Yacc produces output code for Turbo Pascal, rather than for C.
  1345. \item
  1346. TP Yacc does not support \verb"%union" definitions. Instead, a value
  1347. type is declared by specifying the type identifier itself as the tag of
  1348. a \verb"%token" or \verb"%type" definition. TP Yacc will automatically
  1349. generate an appropriate variant record type (\verb"YYSType") which is
  1350. capable of holding values of any of the types used in \verb"%token" and
  1351. \verb"%type".
  1352. Type checking is very strict. If you use type definitions, then
  1353. any symbol referred to in an action must have a type introduced
  1354. in a type definition. Either the symbol must have been assigned a
  1355. type in the definitions section, or the \verb"$<type-identifier>"
  1356. notation must be used. The syntax of the \verb"%type" definition has
  1357. been changed slightly to allow definitions of the form
  1358. \begin{quote}\begin{verbatim}
  1359. %type <type-identifier>
  1360. \end{verbatim}\end{quote}
  1361. (omitting the nonterminals) which may be used to declare types which
  1362. are not assigned to any grammar symbol, but are used with the
  1363. \verb"$<...>" construct.
  1364. \item
  1365. The parse tables constructed by this Yacc version are slightly greater
  1366. than those constructed by UNIX Yacc, since a reduce action will only be
  1367. chosen as the default action if it is the only action in the state.
  1368. In difference, UNIX Yacc chooses a reduce action as the default action
  1369. whenever it is the only reduce action of the state (even if there are
  1370. other shift actions).
  1371. This solves a bug in UNIX Yacc that makes the generated parser start
  1372. error recovery too late with certain types of error productions (see
  1373. also Schreiner/Friedman, {\em Introduction to compiler construction with
  1374. UNIX,\/} 1985). Also, errors will be caught sooner in most cases where
  1375. UNIX Yacc would carry out an additional (default) reduction before
  1376. detecting the error.
  1377. \item
  1378. Library routines are named differently from the UNIX version (e.g.,
  1379. the \verb"yyerrlab" routine takes the place of the \verb"YYERROR"
  1380. macro of UNIX Yacc), and, of course, all macros of UNIX Yacc
  1381. (\verb"YYERROR", \verb"YYACCEPT", etc.) had to be implemented as
  1382. procedures.
  1383. \end{itemize}
  1384. \end{document}