parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. ParseBufSize = 4096;
  15. LastSpecialToken = 5;
  16. TokNames : array[0..LastSpecialToken] of string =
  17. (
  18. 'EOF',
  19. 'Symbol',
  20. 'String',
  21. 'Integer',
  22. 'Float',
  23. 'WideString'
  24. );
  25. function TParser.GetTokenName(aTok: char): string;
  26. begin
  27. if ord(aTok) <= LastSpecialToken then
  28. Result:=TokNames[ord(aTok)]
  29. else Result:=aTok;
  30. end;
  31. procedure TParser.LoadBuffer;
  32. var
  33. BytesRead: integer;
  34. begin
  35. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  36. FBuf[BytesRead] := #0;
  37. Inc(FDeltaPos, BytesRead);
  38. FPos := 0;
  39. FBufLen := BytesRead;
  40. FEofReached:=BytesRead = 0;
  41. end;
  42. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  43. begin
  44. if fBuf[fPos]=#0 then LoadBuffer;
  45. end;
  46. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  47. begin
  48. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  49. inc(fPos);
  50. CheckLoadBuffer;
  51. end;
  52. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  53. begin
  54. Result:=fBuf[fPos] in ['0'..'9'];
  55. end;
  56. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  57. begin
  58. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  59. end;
  60. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  61. begin
  62. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  63. end;
  64. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  65. begin
  66. Result:=IsAlpha or IsNumber;
  67. end;
  68. function TParser.GetHexValue(c: char): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  69. begin
  70. case c of
  71. '0'..'9' : Result:=ord(c)-$30;
  72. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  73. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  74. end;
  75. end;
  76. function TParser.GetAlphaNum: string;
  77. begin
  78. if not IsAlpha then
  79. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  80. Result:='';
  81. while IsAlphaNum do
  82. begin
  83. Result:=Result+fBuf[fPos];
  84. inc(fPos);
  85. CheckLoadBuffer;
  86. end;
  87. end;
  88. procedure TParser.HandleNewLine;
  89. begin
  90. if fBuf[fPos]=#13 then //CR
  91. begin
  92. inc(fPos);
  93. CheckLoadBuffer;
  94. if fBuf[fPos]=#10 then inc(fPos); //CR LF
  95. end
  96. else begin
  97. inc(fPos); //LF
  98. CheckLoadBuffer;
  99. end;
  100. inc(fSourceLine);
  101. fDeltaPos:=-(fPos-1);
  102. end;
  103. procedure TParser.SkipBOM;
  104. var
  105. i : integer;
  106. bom : string[3];
  107. backup : integer;
  108. begin
  109. i:=1;
  110. bom:=' ';
  111. backup:=fPos;
  112. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  113. begin
  114. bom[i]:=fBuf[fPos];
  115. inc(fPos);
  116. CheckLoadBuffer;
  117. inc(i);
  118. end;
  119. if (bom<>(#$EF+#$BB+#$BF)) then
  120. fPos:=backup;
  121. end;
  122. procedure TParser.SkipSpaces;
  123. begin
  124. while fBuf[fPos] in [' ',#9] do begin
  125. inc(fPos);
  126. CheckLoadBuffer;
  127. end;
  128. end;
  129. procedure TParser.SkipWhitespace;
  130. begin
  131. while true do
  132. begin
  133. case fBuf[fPos] of
  134. ' ',#9 : SkipSpaces;
  135. #10,#13 : HandleNewLine
  136. else break;
  137. end;
  138. end;
  139. end;
  140. procedure TParser.HandleEof;
  141. begin
  142. fToken:=toEOF;
  143. fLastTokenStr:='';
  144. end;
  145. procedure TParser.HandleAlphaNum;
  146. begin
  147. fLastTokenStr:=GetAlphaNum;
  148. fToken:=toSymbol;
  149. end;
  150. procedure TParser.HandleNumber;
  151. type
  152. floatPunct = (fpDot,fpE);
  153. floatPuncts = set of floatPunct;
  154. var
  155. allowed : floatPuncts;
  156. begin
  157. fLastTokenStr:='';
  158. while IsNumber do
  159. ProcessChar;
  160. fToken:=toInteger;
  161. if (fBuf[fPos] in ['.','e','E']) then
  162. begin
  163. fToken:=toFloat;
  164. allowed:=[fpDot,fpE];
  165. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  166. begin
  167. case fBuf[fPos] of
  168. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  169. 'E','e' : if fpE in allowed then
  170. begin
  171. allowed:=[];
  172. ProcessChar;
  173. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  174. if not (fBuf[fPos] in ['0'..'9']) then
  175. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  176. end
  177. else break;
  178. end;
  179. ProcessChar;
  180. end;
  181. end;
  182. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  183. begin
  184. fFloatType:=fBuf[fPos];
  185. inc(fPos);
  186. CheckLoadBuffer;
  187. fToken:=toFloat;
  188. end
  189. else fFloatType:=#0;
  190. end;
  191. procedure TParser.HandleHexNumber;
  192. var valid : boolean;
  193. begin
  194. fLastTokenStr:='$';
  195. inc(fPos);
  196. CheckLoadBuffer;
  197. valid:=false;
  198. while IsHexNum do
  199. begin
  200. valid:=true;
  201. ProcessChar;
  202. end;
  203. if not valid then
  204. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  205. fToken:=toInteger;
  206. end;
  207. function TParser.HandleQuotedString: string;
  208. begin
  209. Result:='';
  210. inc(fPos);
  211. CheckLoadBuffer;
  212. while true do
  213. begin
  214. case fBuf[fPos] of
  215. #0 : ErrorStr(SParUnterminatedString);
  216. #13,#10 : ErrorStr(SParUnterminatedString);
  217. '''' : begin
  218. inc(fPos);
  219. CheckLoadBuffer;
  220. if fBuf[fPos]<>'''' then exit;
  221. end;
  222. end;
  223. Result:=Result+fBuf[fPos];
  224. inc(fPos);
  225. CheckLoadBuffer;
  226. end;
  227. end;
  228. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  229. WideChr: widechar; out StringChr: char);
  230. var i : integer;
  231. begin
  232. inc(fPos);
  233. CheckLoadBuffer;
  234. // read a word number
  235. i:=0;
  236. while IsNumber and (i<high(word)) do
  237. begin
  238. i:=i*10+ord(fBuf[fPos])-ord('0');
  239. inc(fPos);
  240. CheckLoadBuffer;
  241. end;
  242. if i>high(word) then i:=0;
  243. if i>127 then ascii:=false;
  244. WideChr:=widechar(word(i));
  245. if i<256 then
  246. StringChr:=chr(i)
  247. else
  248. StringChr:=#0;
  249. end;
  250. procedure TParser.HandleString;
  251. var ascii : boolean;
  252. s: string;
  253. w: WideChar;
  254. c: char;
  255. begin
  256. fLastTokenWStr:='';
  257. fLastTokenStr:='';
  258. ascii:=true;
  259. while true do
  260. begin
  261. case fBuf[fPos] of
  262. '''' :
  263. begin
  264. // avoid conversions,
  265. // On some systems conversion from ansistring to widestring and back
  266. // to ansistring does not give the original ansistring.
  267. // See bug http://bugs.freepascal.org/view.php?id=15841
  268. s:=HandleQuotedString;
  269. fLastTokenWStr:=fLastTokenWStr+s;
  270. fLastTokenStr:=fLastTokenStr+s;
  271. end;
  272. '#' :
  273. begin
  274. HandleDecimalCharacter(ascii,w,c);
  275. fLastTokenWStr:=fLastTokenWStr+w;
  276. fLastTokenStr:=fLastTokenStr+c;
  277. end;
  278. else break;
  279. end;
  280. end;
  281. if ascii then
  282. fToken:=Classes.toString
  283. else
  284. fToken:=toWString;
  285. end;
  286. procedure TParser.HandleMinus;
  287. begin
  288. inc(fPos);
  289. CheckLoadBuffer;
  290. if IsNumber then
  291. begin
  292. HandleNumber;
  293. fLastTokenStr:='-'+fLastTokenStr;
  294. end
  295. else
  296. begin
  297. fToken:='-';
  298. fLastTokenStr:=fToken;
  299. end;
  300. end;
  301. procedure TParser.HandleUnknown;
  302. begin
  303. fToken:=fBuf[fPos];
  304. fLastTokenStr:=fToken;
  305. inc(fPos);
  306. end;
  307. constructor TParser.Create(Stream: TStream);
  308. begin
  309. fStream:=Stream;
  310. fBuf:=GetMem(ParseBufSize+1);
  311. fBufLen:=0;
  312. fPos:=0;
  313. fDeltaPos:=1;
  314. fSourceLine:=1;
  315. fEofReached:=false;
  316. fLastTokenStr:='';
  317. fLastTokenWStr:='';
  318. fFloatType:=#0;
  319. fToken:=#0;
  320. LoadBuffer;
  321. SkipBom;
  322. NextToken;
  323. end;
  324. destructor TParser.Destroy;
  325. begin
  326. fStream.Position:=SourcePos;
  327. FreeMem(fBuf);
  328. end;
  329. procedure TParser.CheckToken(T: Char);
  330. begin
  331. if fToken<>T then
  332. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  333. end;
  334. procedure TParser.CheckTokenSymbol(const S: string);
  335. begin
  336. CheckToken(toSymbol);
  337. if CompareText(fLastTokenStr,S)<>0 then
  338. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  339. end;
  340. procedure TParser.Error(const Ident: string);
  341. begin
  342. ErrorStr(Ident);
  343. end;
  344. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  345. begin
  346. ErrorStr(Format(Ident,Args));
  347. end;
  348. procedure TParser.ErrorStr(const Message: string);
  349. begin
  350. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  351. end;
  352. procedure TParser.HexToBinary(Stream: TStream);
  353. var outbuf : array[0..ParseBufSize-1] of byte;
  354. b : byte;
  355. i : integer;
  356. begin
  357. i:=0;
  358. SkipWhitespace;
  359. while IsHexNum do
  360. begin
  361. b:=(GetHexValue(fBuf[fPos]) shl 4);
  362. inc(fPos);
  363. CheckLoadBuffer;
  364. if not IsHexNum then
  365. Error(SParUnterminatedBinValue);
  366. b:=b or GetHexValue(fBuf[fPos]);
  367. inc(fPos);
  368. outbuf[i]:=b;
  369. inc(i);
  370. if i>=ParseBufSize then
  371. begin
  372. Stream.WriteBuffer(outbuf[0],i);
  373. i:=0;
  374. end;
  375. SkipWhitespace;
  376. end;
  377. if i>0 then
  378. Stream.WriteBuffer(outbuf[0],i);
  379. NextToken;
  380. end;
  381. function TParser.NextToken: Char;
  382. begin
  383. SkipWhiteSpace;
  384. if fEofReached then
  385. HandleEof
  386. else
  387. case fBuf[fPos] of
  388. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  389. '$' : HandleHexNumber;
  390. '-' : HandleMinus;
  391. '0'..'9' : HandleNumber;
  392. '''','#' : HandleString
  393. else
  394. HandleUnknown;
  395. end;
  396. Result:=fToken;
  397. end;
  398. function TParser.SourcePos: Longint;
  399. begin
  400. Result:=fStream.Position-fBufLen+fPos;
  401. end;
  402. function TParser.TokenComponentIdent: string;
  403. begin
  404. if fToken<>toSymbol then
  405. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  406. CheckLoadBuffer;
  407. while fBuf[fPos]='.' do
  408. begin
  409. ProcessChar;
  410. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  411. end;
  412. Result:=fLastTokenStr;
  413. end;
  414. {$ifndef FPUNONE}
  415. Function TParser.TokenFloat: Extended;
  416. var errcode : word;
  417. begin
  418. Val(fLastTokenStr,Result,errcode);
  419. if errcode<>0 then
  420. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  421. end;
  422. {$endif}
  423. Function TParser.TokenInt: Int64;
  424. begin
  425. if not TryStrToInt64(fLastTokenStr,Result) then
  426. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  427. end;
  428. function TParser.TokenString: string;
  429. begin
  430. case fToken of
  431. toWString : Result:=fLastTokenWStr;
  432. toFloat : if fFloatType<>#0 then
  433. Result:=fLastTokenStr+fFloatType
  434. else Result:=fLastTokenStr
  435. else
  436. Result:=fLastTokenStr;
  437. end;
  438. end;
  439. function TParser.TokenWideString: WideString;
  440. begin
  441. if fToken=toWString then
  442. Result:=fLastTokenWStr
  443. else
  444. Result:=fLastTokenStr;
  445. end;
  446. function TParser.TokenSymbolIs(const S: string): Boolean;
  447. begin
  448. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  449. end;