parser.inc 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. ParseBufSize = 4096;
  15. LastSpecialToken = 5;
  16. TokNames : array[0..LastSpecialToken] of string =
  17. (
  18. 'EOF',
  19. 'Symbol',
  20. 'String',
  21. 'Integer',
  22. 'Float',
  23. 'WideString'
  24. );
  25. function TParser.GetTokenName(aTok: char): string;
  26. begin
  27. if ord(aTok) <= LastSpecialToken then
  28. Result:=TokNames[ord(aTok)]
  29. else Result:=aTok;
  30. end;
  31. procedure TParser.LoadBuffer;
  32. var toread : integer;
  33. begin
  34. toread:=fStream.Size-fStream.Position;
  35. if toread>ParseBufSize then toread:=ParseBufSize;
  36. if toread=0 then
  37. begin
  38. fEofReached:=true;
  39. exit;
  40. end;
  41. fStream.ReadBuffer(fBuf[0],toread);
  42. fBuf[toread]:=#0;
  43. inc(fDeltaPos,fPos);
  44. fPos:=0;
  45. fBufLen:=toread;
  46. end;
  47. procedure TParser.CheckLoadBuffer; inline;
  48. begin
  49. if fBuf[fPos]=#0 then LoadBuffer;
  50. end;
  51. procedure TParser.ProcessChar; inline;
  52. begin
  53. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  54. inc(fPos);
  55. CheckLoadBuffer;
  56. end;
  57. function TParser.IsNumber: boolean; inline;
  58. begin
  59. Result:=fBuf[fPos] in ['0'..'9'];
  60. end;
  61. function TParser.IsHexNum: boolean; inline;
  62. begin
  63. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  64. end;
  65. function TParser.IsAlpha: boolean; inline;
  66. begin
  67. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  68. end;
  69. function TParser.IsAlphaNum: boolean; inline;
  70. begin
  71. Result:=IsAlpha or IsNumber;
  72. end;
  73. function TParser.GetHexValue(c: char): byte; inline;
  74. begin
  75. case c of
  76. '0'..'9' : Result:=ord(c)-$30;
  77. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  78. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  79. end;
  80. end;
  81. function TParser.GetAlphaNum: string;
  82. begin
  83. if not IsAlpha then
  84. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  85. Result:='';
  86. while IsAlphaNum do
  87. begin
  88. Result:=Result+fBuf[fPos];
  89. inc(fPos);
  90. CheckLoadBuffer;
  91. end;
  92. end;
  93. procedure TParser.HandleNewLine;
  94. begin
  95. if fBuf[fPos]=#13 then //CR
  96. begin
  97. inc(fPos);
  98. CheckLoadBuffer;
  99. if fBuf[fPos]=#10 then inc(fPos); //CR LF
  100. end
  101. else inc(fPos); //LF
  102. inc(fSourceLine);
  103. fDeltaPos:=-(fPos-1);
  104. end;
  105. procedure TParser.SkipSpaces;
  106. begin
  107. while fBuf[fPos] in [' ',#9] do
  108. inc(fPos);
  109. end;
  110. procedure TParser.SkipWhitespace;
  111. begin
  112. while true do
  113. begin
  114. CheckLoadBuffer;
  115. case fBuf[fPos] of
  116. ' ',#9 : SkipSpaces;
  117. #10,#13 : HandleNewLine
  118. else break;
  119. end;
  120. end;
  121. end;
  122. procedure TParser.HandleEof;
  123. begin
  124. fToken:=toEOF;
  125. fLastTokenStr:='';
  126. end;
  127. procedure TParser.HandleAlphaNum;
  128. begin
  129. fLastTokenStr:=GetAlphaNum;
  130. fToken:=toSymbol;
  131. end;
  132. procedure TParser.HandleNumber;
  133. type
  134. floatPunct = (fpDot,fpE);
  135. floatPuncts = set of floatPunct;
  136. var
  137. allowed : floatPuncts;
  138. begin
  139. fLastTokenStr:='';
  140. while IsNumber do
  141. ProcessChar;
  142. fToken:=toInteger;
  143. if (fBuf[fPos] in ['.','e','E']) then
  144. begin
  145. fToken:=toFloat;
  146. allowed:=[fpDot,fpE];
  147. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  148. begin
  149. case fBuf[fPos] of
  150. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  151. 'E','e' : if fpE in allowed then
  152. begin
  153. allowed:=[];
  154. ProcessChar;
  155. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  156. if not (fBuf[fPos] in ['0'..'9']) then
  157. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  158. end
  159. else break;
  160. end;
  161. ProcessChar;
  162. end;
  163. end;
  164. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  165. begin
  166. fFloatType:=fBuf[fPos];
  167. inc(fPos);
  168. fToken:=toFloat;
  169. end
  170. else fFloatType:=#0;
  171. end;
  172. procedure TParser.HandleHexNumber;
  173. var valid : boolean;
  174. begin
  175. fLastTokenStr:='$';
  176. inc(fPos);
  177. CheckLoadBuffer;
  178. valid:=false;
  179. while IsHexNum do
  180. begin
  181. valid:=true;
  182. ProcessChar;
  183. end;
  184. if not valid then
  185. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  186. fToken:=toInteger;
  187. end;
  188. function TParser.HandleQuotedString: string;
  189. begin
  190. Result:='';
  191. inc(fPos);
  192. CheckLoadBuffer;
  193. while true do
  194. begin
  195. case fBuf[fPos] of
  196. #0 : ErrorStr(SParUnterminatedString);
  197. #13,#10 : ErrorStr(SParUnterminatedString);
  198. '''' : begin
  199. inc(fPos);
  200. CheckLoadBuffer;
  201. if fBuf[fPos]<>'''' then exit;
  202. end;
  203. end;
  204. Result:=Result+fBuf[fPos];
  205. inc(fPos);
  206. CheckLoadBuffer;
  207. end;
  208. end;
  209. function TParser.HandleDecimalString(var ascii : boolean): widestring;
  210. var i : integer;
  211. begin
  212. Result:='';
  213. inc(fPos);
  214. CheckLoadBuffer;
  215. while IsNumber do
  216. begin
  217. Result:=Result+fBuf[fPos];
  218. inc(fPos);
  219. CheckLoadBuffer;
  220. end;
  221. if TryStrToInt(Result,i) and (i<256) then
  222. begin
  223. if i>127 then ascii:=false;
  224. setlength(Result,1);
  225. Result[1]:=widechar(word(i));
  226. end
  227. else
  228. Result:='#'+Result;
  229. end;
  230. procedure TParser.HandleString;
  231. var ascii : boolean;
  232. begin
  233. fLastTokenWStr:='';
  234. ascii:=true;
  235. while true do
  236. case fBuf[fPos] of
  237. '''' : fLastTokenWStr:=fLastTokenWStr+HandleQuotedString;
  238. '#' : fLastTokenWStr:=fLastTokenWStr+HandleDecimalString(ascii)
  239. else break;
  240. end;
  241. if ascii then
  242. fToken:=toString
  243. else
  244. fToken:=toWString;
  245. fLastTokenStr:=fLastTokenWStr;
  246. end;
  247. procedure TParser.HandleMinus;
  248. begin
  249. inc(fPos);
  250. CheckLoadBuffer;
  251. if IsNumber then
  252. begin
  253. HandleNumber;
  254. fLastTokenStr:='-'+fLastTokenStr;
  255. end
  256. else
  257. begin
  258. fToken:='-';
  259. fLastTokenStr:=fToken;
  260. end;
  261. end;
  262. procedure TParser.HandleUnknown;
  263. begin
  264. fToken:=fBuf[fPos];
  265. fLastTokenStr:=fToken;
  266. inc(fPos);
  267. end;
  268. constructor TParser.Create(Stream: TStream);
  269. begin
  270. fStream:=Stream;
  271. fBuf:=GetMem(ParseBufSize+1);
  272. fBufLen:=0;
  273. fPos:=0;
  274. fDeltaPos:=1;
  275. fSourceLine:=1;
  276. fEofReached:=false;
  277. fLastTokenStr:='';
  278. fLastTokenWStr:='';
  279. fFloatType:=#0;
  280. fToken:=#0;
  281. LoadBuffer;
  282. NextToken;
  283. end;
  284. destructor TParser.Destroy;
  285. begin
  286. fStream.Position:=SourcePos;
  287. FreeMem(fBuf);
  288. end;
  289. procedure TParser.CheckToken(T: Char);
  290. begin
  291. if fToken<>T then
  292. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  293. end;
  294. procedure TParser.CheckTokenSymbol(const S: string);
  295. begin
  296. CheckToken(toSymbol);
  297. if CompareText(fLastTokenStr,S)<>0 then
  298. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  299. end;
  300. procedure TParser.Error(const Ident: string);
  301. begin
  302. ErrorStr(Ident);
  303. end;
  304. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  305. begin
  306. ErrorStr(Format(Ident,Args));
  307. end;
  308. procedure TParser.ErrorStr(const Message: string);
  309. begin
  310. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  311. end;
  312. procedure TParser.HexToBinary(Stream: TStream);
  313. var outbuf : array[0..ParseBufSize-1] of byte;
  314. b : byte;
  315. i : integer;
  316. begin
  317. i:=0;
  318. SkipWhitespace;
  319. while IsHexNum do
  320. begin
  321. b:=(GetHexValue(fBuf[fPos]) shl 4);
  322. inc(fPos);
  323. CheckLoadBuffer;
  324. if not IsHexNum then
  325. Error(SParUnterminatedBinValue);
  326. b:=b or GetHexValue(fBuf[fPos]);
  327. inc(fPos);
  328. outbuf[i]:=b;
  329. inc(i);
  330. if i>=ParseBufSize then
  331. begin
  332. Stream.WriteBuffer(outbuf[0],i);
  333. i:=0;
  334. end;
  335. SkipWhitespace;
  336. end;
  337. if i>0 then
  338. Stream.WriteBuffer(outbuf[0],i);
  339. NextToken;
  340. end;
  341. function TParser.NextToken: Char;
  342. begin
  343. SkipWhiteSpace;
  344. if fEofReached then
  345. HandleEof
  346. else
  347. case fBuf[fPos] of
  348. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  349. '$' : HandleHexNumber;
  350. '-' : HandleMinus;
  351. '0'..'9' : HandleNumber;
  352. '''','#' : HandleString
  353. else
  354. HandleUnknown;
  355. end;
  356. Result:=fToken;
  357. end;
  358. function TParser.SourcePos: Longint;
  359. begin
  360. Result:=fStream.Position-fBufLen+fPos;
  361. end;
  362. function TParser.TokenComponentIdent: string;
  363. begin
  364. if fToken<>toSymbol then
  365. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  366. CheckLoadBuffer;
  367. while fBuf[fPos]='.' do
  368. begin
  369. ProcessChar;
  370. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  371. end;
  372. Result:=fLastTokenStr;
  373. end;
  374. {$ifndef FPUNONE}
  375. Function TParser.TokenFloat: Extended;
  376. var errcode : word;
  377. begin
  378. Val(fLastTokenStr,Result,errcode);
  379. if errcode<>0 then
  380. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  381. end;
  382. {$endif}
  383. Function TParser.TokenInt: Int64;
  384. begin
  385. if not TryStrToInt64(fLastTokenStr,Result) then
  386. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  387. end;
  388. function TParser.TokenString: string;
  389. begin
  390. case fToken of
  391. toWString : Result:=fLastTokenWStr;
  392. toFloat : if fFloatType<>#0 then
  393. Result:=fLastTokenStr+fFloatType
  394. else Result:=fLastTokenStr
  395. else
  396. Result:=fLastTokenStr;
  397. end;
  398. end;
  399. function TParser.TokenWideString: WideString;
  400. begin
  401. if fToken=toWString then
  402. Result:=fLastTokenWStr
  403. else
  404. Result:=fLastTokenStr;
  405. end;
  406. function TParser.TokenSymbolIs(const S: string): Boolean;
  407. begin
  408. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  409. end;