parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. ParseBufSize = 4096;
  15. LastSpecialToken = 5;
  16. TokNames : array[0..LastSpecialToken] of string =
  17. (
  18. 'EOF',
  19. 'Symbol',
  20. 'String',
  21. 'Integer',
  22. 'Float',
  23. 'WideString'
  24. );
  25. function TParser.GetTokenName(aTok: char): string;
  26. begin
  27. if ord(aTok) <= LastSpecialToken then
  28. Result:=TokNames[ord(aTok)]
  29. else Result:=aTok;
  30. end;
  31. procedure TParser.LoadBuffer;
  32. var
  33. BytesRead: integer;
  34. begin
  35. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  36. if BytesRead = 0 then
  37. begin
  38. FEofReached := True;
  39. Exit;
  40. end;
  41. FBuf[BytesRead] := #0;
  42. Inc(FDeltaPos, BytesRead);
  43. FPos := 0;
  44. FBufLen := BytesRead;
  45. end;
  46. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  47. begin
  48. if fBuf[fPos]=#0 then LoadBuffer;
  49. end;
  50. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  51. begin
  52. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  53. inc(fPos);
  54. CheckLoadBuffer;
  55. end;
  56. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  57. begin
  58. Result:=fBuf[fPos] in ['0'..'9'];
  59. end;
  60. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  61. begin
  62. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  63. end;
  64. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  65. begin
  66. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  67. end;
  68. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  69. begin
  70. Result:=IsAlpha or IsNumber;
  71. end;
  72. function TParser.GetHexValue(c: char): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  73. begin
  74. case c of
  75. '0'..'9' : Result:=ord(c)-$30;
  76. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  77. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  78. end;
  79. end;
  80. function TParser.GetAlphaNum: string;
  81. begin
  82. if not IsAlpha then
  83. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  84. Result:='';
  85. while IsAlphaNum do
  86. begin
  87. Result:=Result+fBuf[fPos];
  88. inc(fPos);
  89. CheckLoadBuffer;
  90. end;
  91. end;
  92. procedure TParser.HandleNewLine;
  93. begin
  94. if fBuf[fPos]=#13 then //CR
  95. begin
  96. inc(fPos);
  97. CheckLoadBuffer;
  98. if fBuf[fPos]=#10 then inc(fPos); //CR LF
  99. end
  100. else inc(fPos); //LF
  101. inc(fSourceLine);
  102. fDeltaPos:=-(fPos-1);
  103. end;
  104. procedure TParser.SkipBOM;
  105. var
  106. i : integer;
  107. bom : string[3];
  108. backup : integer;
  109. begin
  110. i:=1;
  111. bom:=' ';
  112. backup:=fPos;
  113. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  114. begin
  115. bom[i]:=fBuf[fPos];
  116. inc(fPos);
  117. inc(i);
  118. end;
  119. if (bom<>(#$EF+#$BB+#$BF)) then
  120. fPos:=backup;
  121. end;
  122. procedure TParser.SkipSpaces;
  123. begin
  124. while fBuf[fPos] in [' ',#9] do
  125. inc(fPos);
  126. end;
  127. procedure TParser.SkipWhitespace;
  128. begin
  129. while true do
  130. begin
  131. CheckLoadBuffer;
  132. case fBuf[fPos] of
  133. ' ',#9 : SkipSpaces;
  134. #10,#13 : HandleNewLine
  135. else break;
  136. end;
  137. end;
  138. end;
  139. procedure TParser.HandleEof;
  140. begin
  141. fToken:=toEOF;
  142. fLastTokenStr:='';
  143. end;
  144. procedure TParser.HandleAlphaNum;
  145. begin
  146. fLastTokenStr:=GetAlphaNum;
  147. fToken:=toSymbol;
  148. end;
  149. procedure TParser.HandleNumber;
  150. type
  151. floatPunct = (fpDot,fpE);
  152. floatPuncts = set of floatPunct;
  153. var
  154. allowed : floatPuncts;
  155. begin
  156. fLastTokenStr:='';
  157. while IsNumber do
  158. ProcessChar;
  159. fToken:=toInteger;
  160. if (fBuf[fPos] in ['.','e','E']) then
  161. begin
  162. fToken:=toFloat;
  163. allowed:=[fpDot,fpE];
  164. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  165. begin
  166. case fBuf[fPos] of
  167. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  168. 'E','e' : if fpE in allowed then
  169. begin
  170. allowed:=[];
  171. ProcessChar;
  172. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  173. if not (fBuf[fPos] in ['0'..'9']) then
  174. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  175. end
  176. else break;
  177. end;
  178. ProcessChar;
  179. end;
  180. end;
  181. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  182. begin
  183. fFloatType:=fBuf[fPos];
  184. inc(fPos);
  185. fToken:=toFloat;
  186. end
  187. else fFloatType:=#0;
  188. end;
  189. procedure TParser.HandleHexNumber;
  190. var valid : boolean;
  191. begin
  192. fLastTokenStr:='$';
  193. inc(fPos);
  194. CheckLoadBuffer;
  195. valid:=false;
  196. while IsHexNum do
  197. begin
  198. valid:=true;
  199. ProcessChar;
  200. end;
  201. if not valid then
  202. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  203. fToken:=toInteger;
  204. end;
  205. function TParser.HandleQuotedString: string;
  206. begin
  207. Result:='';
  208. inc(fPos);
  209. CheckLoadBuffer;
  210. while true do
  211. begin
  212. case fBuf[fPos] of
  213. #0 : ErrorStr(SParUnterminatedString);
  214. #13,#10 : ErrorStr(SParUnterminatedString);
  215. '''' : begin
  216. inc(fPos);
  217. CheckLoadBuffer;
  218. if fBuf[fPos]<>'''' then exit;
  219. end;
  220. end;
  221. Result:=Result+fBuf[fPos];
  222. inc(fPos);
  223. CheckLoadBuffer;
  224. end;
  225. end;
  226. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  227. WideChr: widechar; out StringChr: char);
  228. var i : integer;
  229. begin
  230. inc(fPos);
  231. CheckLoadBuffer;
  232. // read a word number
  233. i:=0;
  234. while IsNumber and (i<high(word)) do
  235. begin
  236. i:=i*10+ord(fBuf[fPos])-ord('0');
  237. inc(fPos);
  238. CheckLoadBuffer;
  239. end;
  240. if i>high(word) then i:=0;
  241. if i>127 then ascii:=false;
  242. WideChr:=widechar(word(i));
  243. if i<256 then
  244. StringChr:=chr(i)
  245. else
  246. StringChr:=#0;
  247. end;
  248. procedure TParser.HandleString;
  249. var ascii : boolean;
  250. s: string;
  251. w: WideChar;
  252. c: char;
  253. begin
  254. fLastTokenWStr:='';
  255. fLastTokenStr:='';
  256. ascii:=true;
  257. while true do
  258. begin
  259. case fBuf[fPos] of
  260. '''' :
  261. begin
  262. // avoid conversions,
  263. // On some systems conversion from ansistring to widestring and back
  264. // to ansistring does not give the original ansistring.
  265. // See bug http://bugs.freepascal.org/view.php?id=15841
  266. s:=HandleQuotedString;
  267. fLastTokenWStr:=fLastTokenWStr+s;
  268. fLastTokenStr:=fLastTokenStr+s;
  269. end;
  270. '#' :
  271. begin
  272. HandleDecimalCharacter(ascii,w,c);
  273. fLastTokenWStr:=fLastTokenWStr+w;
  274. fLastTokenStr:=fLastTokenStr+c;
  275. end;
  276. else break;
  277. end;
  278. end;
  279. if ascii then
  280. fToken:=Classes.toString
  281. else
  282. fToken:=toWString;
  283. end;
  284. procedure TParser.HandleMinus;
  285. begin
  286. inc(fPos);
  287. CheckLoadBuffer;
  288. if IsNumber then
  289. begin
  290. HandleNumber;
  291. fLastTokenStr:='-'+fLastTokenStr;
  292. end
  293. else
  294. begin
  295. fToken:='-';
  296. fLastTokenStr:=fToken;
  297. end;
  298. end;
  299. procedure TParser.HandleUnknown;
  300. begin
  301. fToken:=fBuf[fPos];
  302. fLastTokenStr:=fToken;
  303. inc(fPos);
  304. end;
  305. constructor TParser.Create(Stream: TStream);
  306. begin
  307. fStream:=Stream;
  308. fBuf:=GetMem(ParseBufSize+1);
  309. fBufLen:=0;
  310. fPos:=0;
  311. fDeltaPos:=1;
  312. fSourceLine:=1;
  313. fEofReached:=false;
  314. fLastTokenStr:='';
  315. fLastTokenWStr:='';
  316. fFloatType:=#0;
  317. fToken:=#0;
  318. LoadBuffer;
  319. SkipBom;
  320. NextToken;
  321. end;
  322. destructor TParser.Destroy;
  323. begin
  324. fStream.Position:=SourcePos;
  325. FreeMem(fBuf);
  326. end;
  327. procedure TParser.CheckToken(T: Char);
  328. begin
  329. if fToken<>T then
  330. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  331. end;
  332. procedure TParser.CheckTokenSymbol(const S: string);
  333. begin
  334. CheckToken(toSymbol);
  335. if CompareText(fLastTokenStr,S)<>0 then
  336. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  337. end;
  338. procedure TParser.Error(const Ident: string);
  339. begin
  340. ErrorStr(Ident);
  341. end;
  342. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  343. begin
  344. ErrorStr(Format(Ident,Args));
  345. end;
  346. procedure TParser.ErrorStr(const Message: string);
  347. begin
  348. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  349. end;
  350. procedure TParser.HexToBinary(Stream: TStream);
  351. var outbuf : array[0..ParseBufSize-1] of byte;
  352. b : byte;
  353. i : integer;
  354. begin
  355. i:=0;
  356. SkipWhitespace;
  357. while IsHexNum do
  358. begin
  359. b:=(GetHexValue(fBuf[fPos]) shl 4);
  360. inc(fPos);
  361. CheckLoadBuffer;
  362. if not IsHexNum then
  363. Error(SParUnterminatedBinValue);
  364. b:=b or GetHexValue(fBuf[fPos]);
  365. inc(fPos);
  366. outbuf[i]:=b;
  367. inc(i);
  368. if i>=ParseBufSize then
  369. begin
  370. Stream.WriteBuffer(outbuf[0],i);
  371. i:=0;
  372. end;
  373. SkipWhitespace;
  374. end;
  375. if i>0 then
  376. Stream.WriteBuffer(outbuf[0],i);
  377. NextToken;
  378. end;
  379. function TParser.NextToken: Char;
  380. begin
  381. SkipWhiteSpace;
  382. if fEofReached then
  383. HandleEof
  384. else
  385. case fBuf[fPos] of
  386. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  387. '$' : HandleHexNumber;
  388. '-' : HandleMinus;
  389. '0'..'9' : HandleNumber;
  390. '''','#' : HandleString
  391. else
  392. HandleUnknown;
  393. end;
  394. Result:=fToken;
  395. end;
  396. function TParser.SourcePos: Longint;
  397. begin
  398. Result:=fStream.Position-fBufLen+fPos;
  399. end;
  400. function TParser.TokenComponentIdent: string;
  401. begin
  402. if fToken<>toSymbol then
  403. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  404. CheckLoadBuffer;
  405. while fBuf[fPos]='.' do
  406. begin
  407. ProcessChar;
  408. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  409. end;
  410. Result:=fLastTokenStr;
  411. end;
  412. {$ifndef FPUNONE}
  413. Function TParser.TokenFloat: Extended;
  414. var errcode : word;
  415. begin
  416. Val(fLastTokenStr,Result,errcode);
  417. if errcode<>0 then
  418. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  419. end;
  420. {$endif}
  421. Function TParser.TokenInt: Int64;
  422. begin
  423. if not TryStrToInt64(fLastTokenStr,Result) then
  424. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  425. end;
  426. function TParser.TokenString: string;
  427. begin
  428. case fToken of
  429. toWString : Result:=fLastTokenWStr;
  430. toFloat : if fFloatType<>#0 then
  431. Result:=fLastTokenStr+fFloatType
  432. else Result:=fLastTokenStr
  433. else
  434. Result:=fLastTokenStr;
  435. end;
  436. end;
  437. function TParser.TokenWideString: WideString;
  438. begin
  439. if fToken=toWString then
  440. Result:=fLastTokenWStr
  441. else
  442. Result:=fLastTokenStr;
  443. end;
  444. function TParser.TokenSymbolIs(const S: string): Boolean;
  445. begin
  446. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  447. end;