parser.inc 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. ParseBufSize = 4096;
  15. LastSpecialToken = 5;
  16. TokNames : array[0..LastSpecialToken] of string =
  17. (
  18. 'EOF',
  19. 'Symbol',
  20. 'String',
  21. 'Integer',
  22. 'Float',
  23. 'WideString'
  24. );
  25. function TParser.GetTokenName(aTok: char): string;
  26. begin
  27. if ord(aTok) <= LastSpecialToken then
  28. Result:=TokNames[ord(aTok)]
  29. else Result:=aTok;
  30. end;
  31. procedure TParser.LoadBuffer;
  32. var
  33. BytesRead: integer;
  34. begin
  35. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  36. if BytesRead = 0 then
  37. begin
  38. FEofReached := True;
  39. Exit;
  40. end;
  41. FBuf[BytesRead] := #0;
  42. Inc(FDeltaPos, BytesRead);
  43. FPos := 0;
  44. FBufLen := BytesRead;
  45. end;
  46. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  47. begin
  48. if fBuf[fPos]=#0 then LoadBuffer;
  49. end;
  50. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  51. begin
  52. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  53. inc(fPos);
  54. CheckLoadBuffer;
  55. end;
  56. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  57. begin
  58. Result:=fBuf[fPos] in ['0'..'9'];
  59. end;
  60. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  61. begin
  62. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  63. end;
  64. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  65. begin
  66. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  67. end;
  68. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  69. begin
  70. Result:=IsAlpha or IsNumber;
  71. end;
  72. function TParser.GetHexValue(c: char): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  73. begin
  74. case c of
  75. '0'..'9' : Result:=ord(c)-$30;
  76. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  77. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  78. end;
  79. end;
  80. function TParser.GetAlphaNum: string;
  81. begin
  82. if not IsAlpha then
  83. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  84. Result:='';
  85. while IsAlphaNum do
  86. begin
  87. Result:=Result+fBuf[fPos];
  88. inc(fPos);
  89. CheckLoadBuffer;
  90. end;
  91. end;
  92. procedure TParser.HandleNewLine;
  93. begin
  94. if fBuf[fPos]=#13 then //CR
  95. begin
  96. inc(fPos);
  97. CheckLoadBuffer;
  98. if fBuf[fPos]=#10 then inc(fPos); //CR LF
  99. end
  100. else inc(fPos); //LF
  101. inc(fSourceLine);
  102. fDeltaPos:=-(fPos-1);
  103. end;
  104. procedure TParser.SkipBOM;
  105. var
  106. i : integer;
  107. bom : string[3];
  108. backup : integer;
  109. begin
  110. i:=1;
  111. bom:=' ';
  112. backup:=fPos;
  113. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  114. begin
  115. bom[i]:=fBuf[fPos];
  116. inc(fPos);
  117. inc(i);
  118. end;
  119. if (bom<>(#$EF+#$BB+#$BF)) then
  120. fPos:=backup;
  121. end;
  122. procedure TParser.SkipSpaces;
  123. begin
  124. while fBuf[fPos] in [' ',#9] do
  125. inc(fPos);
  126. end;
  127. procedure TParser.SkipWhitespace;
  128. begin
  129. while true do
  130. begin
  131. CheckLoadBuffer;
  132. case fBuf[fPos] of
  133. ' ',#9 : SkipSpaces;
  134. #10,#13 : HandleNewLine
  135. else break;
  136. end;
  137. end;
  138. end;
  139. procedure TParser.HandleEof;
  140. begin
  141. fToken:=toEOF;
  142. fLastTokenStr:='';
  143. end;
  144. procedure TParser.HandleAlphaNum;
  145. begin
  146. fLastTokenStr:=GetAlphaNum;
  147. fToken:=toSymbol;
  148. end;
  149. procedure TParser.HandleNumber;
  150. type
  151. floatPunct = (fpDot,fpE);
  152. floatPuncts = set of floatPunct;
  153. var
  154. allowed : floatPuncts;
  155. begin
  156. fLastTokenStr:='';
  157. while IsNumber do
  158. ProcessChar;
  159. fToken:=toInteger;
  160. if (fBuf[fPos] in ['.','e','E']) then
  161. begin
  162. fToken:=toFloat;
  163. allowed:=[fpDot,fpE];
  164. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  165. begin
  166. case fBuf[fPos] of
  167. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  168. 'E','e' : if fpE in allowed then
  169. begin
  170. allowed:=[];
  171. ProcessChar;
  172. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  173. if not (fBuf[fPos] in ['0'..'9']) then
  174. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  175. end
  176. else break;
  177. end;
  178. ProcessChar;
  179. end;
  180. end;
  181. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  182. begin
  183. fFloatType:=fBuf[fPos];
  184. inc(fPos);
  185. fToken:=toFloat;
  186. end
  187. else fFloatType:=#0;
  188. end;
  189. procedure TParser.HandleHexNumber;
  190. var valid : boolean;
  191. begin
  192. fLastTokenStr:='$';
  193. inc(fPos);
  194. CheckLoadBuffer;
  195. valid:=false;
  196. while IsHexNum do
  197. begin
  198. valid:=true;
  199. ProcessChar;
  200. end;
  201. if not valid then
  202. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  203. fToken:=toInteger;
  204. end;
  205. function TParser.HandleQuotedString: string;
  206. begin
  207. Result:='';
  208. inc(fPos);
  209. CheckLoadBuffer;
  210. while true do
  211. begin
  212. case fBuf[fPos] of
  213. #0 : ErrorStr(SParUnterminatedString);
  214. #13,#10 : ErrorStr(SParUnterminatedString);
  215. '''' : begin
  216. inc(fPos);
  217. CheckLoadBuffer;
  218. if fBuf[fPos]<>'''' then exit;
  219. end;
  220. end;
  221. Result:=Result+fBuf[fPos];
  222. inc(fPos);
  223. CheckLoadBuffer;
  224. end;
  225. end;
  226. function TParser.HandleDecimalString(var ascii : boolean): widestring;
  227. var i : integer;
  228. begin
  229. Result:='';
  230. inc(fPos);
  231. CheckLoadBuffer;
  232. while IsNumber do
  233. begin
  234. Result:=Result+fBuf[fPos];
  235. inc(fPos);
  236. CheckLoadBuffer;
  237. end;
  238. if not TryStrToInt(Result,i) then
  239. i:=0;
  240. if i>127 then ascii:=false;
  241. setlength(Result,1);
  242. Result[1]:=widechar(word(i));
  243. end;
  244. procedure TParser.HandleString;
  245. var ascii : boolean;
  246. begin
  247. fLastTokenWStr:='';
  248. ascii:=true;
  249. while true do
  250. case fBuf[fPos] of
  251. '''' : fLastTokenWStr:=fLastTokenWStr+HandleQuotedString;
  252. '#' : fLastTokenWStr:=fLastTokenWStr+HandleDecimalString(ascii)
  253. else break;
  254. end;
  255. if ascii then
  256. fToken:=toString
  257. else
  258. fToken:=toWString;
  259. fLastTokenStr:=fLastTokenWStr;
  260. end;
  261. procedure TParser.HandleMinus;
  262. begin
  263. inc(fPos);
  264. CheckLoadBuffer;
  265. if IsNumber then
  266. begin
  267. HandleNumber;
  268. fLastTokenStr:='-'+fLastTokenStr;
  269. end
  270. else
  271. begin
  272. fToken:='-';
  273. fLastTokenStr:=fToken;
  274. end;
  275. end;
  276. procedure TParser.HandleUnknown;
  277. begin
  278. fToken:=fBuf[fPos];
  279. fLastTokenStr:=fToken;
  280. inc(fPos);
  281. end;
  282. constructor TParser.Create(Stream: TStream);
  283. begin
  284. fStream:=Stream;
  285. fBuf:=GetMem(ParseBufSize+1);
  286. fBufLen:=0;
  287. fPos:=0;
  288. fDeltaPos:=1;
  289. fSourceLine:=1;
  290. fEofReached:=false;
  291. fLastTokenStr:='';
  292. fLastTokenWStr:='';
  293. fFloatType:=#0;
  294. fToken:=#0;
  295. LoadBuffer;
  296. SkipBom;
  297. NextToken;
  298. end;
  299. destructor TParser.Destroy;
  300. begin
  301. fStream.Position:=SourcePos;
  302. FreeMem(fBuf);
  303. end;
  304. procedure TParser.CheckToken(T: Char);
  305. begin
  306. if fToken<>T then
  307. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  308. end;
  309. procedure TParser.CheckTokenSymbol(const S: string);
  310. begin
  311. CheckToken(toSymbol);
  312. if CompareText(fLastTokenStr,S)<>0 then
  313. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  314. end;
  315. procedure TParser.Error(const Ident: string);
  316. begin
  317. ErrorStr(Ident);
  318. end;
  319. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  320. begin
  321. ErrorStr(Format(Ident,Args));
  322. end;
  323. procedure TParser.ErrorStr(const Message: string);
  324. begin
  325. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  326. end;
  327. procedure TParser.HexToBinary(Stream: TStream);
  328. var outbuf : array[0..ParseBufSize-1] of byte;
  329. b : byte;
  330. i : integer;
  331. begin
  332. i:=0;
  333. SkipWhitespace;
  334. while IsHexNum do
  335. begin
  336. b:=(GetHexValue(fBuf[fPos]) shl 4);
  337. inc(fPos);
  338. CheckLoadBuffer;
  339. if not IsHexNum then
  340. Error(SParUnterminatedBinValue);
  341. b:=b or GetHexValue(fBuf[fPos]);
  342. inc(fPos);
  343. outbuf[i]:=b;
  344. inc(i);
  345. if i>=ParseBufSize then
  346. begin
  347. Stream.WriteBuffer(outbuf[0],i);
  348. i:=0;
  349. end;
  350. SkipWhitespace;
  351. end;
  352. if i>0 then
  353. Stream.WriteBuffer(outbuf[0],i);
  354. NextToken;
  355. end;
  356. function TParser.NextToken: Char;
  357. begin
  358. SkipWhiteSpace;
  359. if fEofReached then
  360. HandleEof
  361. else
  362. case fBuf[fPos] of
  363. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  364. '$' : HandleHexNumber;
  365. '-' : HandleMinus;
  366. '0'..'9' : HandleNumber;
  367. '''','#' : HandleString
  368. else
  369. HandleUnknown;
  370. end;
  371. Result:=fToken;
  372. end;
  373. function TParser.SourcePos: Longint;
  374. begin
  375. Result:=fStream.Position-fBufLen+fPos;
  376. end;
  377. function TParser.TokenComponentIdent: string;
  378. begin
  379. if fToken<>toSymbol then
  380. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  381. CheckLoadBuffer;
  382. while fBuf[fPos]='.' do
  383. begin
  384. ProcessChar;
  385. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  386. end;
  387. Result:=fLastTokenStr;
  388. end;
  389. {$ifndef FPUNONE}
  390. Function TParser.TokenFloat: Extended;
  391. var errcode : word;
  392. begin
  393. Val(fLastTokenStr,Result,errcode);
  394. if errcode<>0 then
  395. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  396. end;
  397. {$endif}
  398. Function TParser.TokenInt: Int64;
  399. begin
  400. if not TryStrToInt64(fLastTokenStr,Result) then
  401. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  402. end;
  403. function TParser.TokenString: string;
  404. begin
  405. case fToken of
  406. toWString : Result:=fLastTokenWStr;
  407. toFloat : if fFloatType<>#0 then
  408. Result:=fLastTokenStr+fFloatType
  409. else Result:=fLastTokenStr
  410. else
  411. Result:=fLastTokenStr;
  412. end;
  413. end;
  414. function TParser.TokenWideString: WideString;
  415. begin
  416. if fToken=toWString then
  417. Result:=fLastTokenWStr
  418. else
  419. Result:=fLastTokenStr;
  420. end;
  421. function TParser.TokenSymbolIs(const S: string): Boolean;
  422. begin
  423. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  424. end;