parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. ParseBufSize = 4096;
  15. LastSpecialToken = 5;
  16. TokNames : array[0..LastSpecialToken] of string =
  17. (
  18. 'EOF',
  19. 'Symbol',
  20. 'String',
  21. 'Integer',
  22. 'Float',
  23. 'WideString'
  24. );
  25. function TParser.GetTokenName(aTok: char): string;
  26. begin
  27. if ord(aTok) <= LastSpecialToken then
  28. Result:=TokNames[ord(aTok)]
  29. else Result:=aTok;
  30. end;
  31. procedure TParser.LoadBuffer;
  32. var
  33. BytesRead: integer;
  34. begin
  35. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  36. FBuf[BytesRead] := #0;
  37. Inc(FDeltaPos, BytesRead);
  38. FPos := 0;
  39. FBufLen := BytesRead;
  40. FEofReached:=BytesRead = 0;
  41. end;
  42. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  43. begin
  44. if fBuf[fPos]=#0 then LoadBuffer;
  45. end;
  46. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  47. begin
  48. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  49. inc(fPos);
  50. CheckLoadBuffer;
  51. end;
  52. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  53. begin
  54. Result:=fBuf[fPos] in ['0'..'9'];
  55. end;
  56. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  57. begin
  58. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  59. end;
  60. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  61. begin
  62. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  63. end;
  64. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  65. begin
  66. Result:=IsAlpha or IsNumber;
  67. end;
  68. function TParser.GetHexValue(c: char): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  69. begin
  70. case c of
  71. '0'..'9' : Result:=ord(c)-$30;
  72. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  73. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  74. end;
  75. end;
  76. function TParser.GetAlphaNum: string;
  77. begin
  78. if not IsAlpha then
  79. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  80. Result:='';
  81. while IsAlphaNum do
  82. begin
  83. Result:=Result+fBuf[fPos];
  84. inc(fPos);
  85. CheckLoadBuffer;
  86. end;
  87. end;
  88. procedure TParser.HandleNewLine;
  89. begin
  90. if fBuf[fPos]=#13 then //CR
  91. begin
  92. inc(fPos);
  93. CheckLoadBuffer;
  94. end;
  95. if fBuf[fPos]=#10 then
  96. begin
  97. inc(fPos); //CR LF or LF
  98. CheckLoadBuffer;
  99. end;
  100. inc(fSourceLine);
  101. fDeltaPos:=-(fPos-1);
  102. end;
  103. procedure TParser.SkipBOM;
  104. var
  105. i : integer;
  106. bom : string[3];
  107. backup : integer;
  108. begin
  109. i:=1;
  110. bom:=' ';
  111. backup:=fPos;
  112. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  113. begin
  114. bom[i]:=fBuf[fPos];
  115. inc(fPos);
  116. CheckLoadBuffer;
  117. inc(i);
  118. end;
  119. if (bom<>(#$EF+#$BB+#$BF)) then
  120. fPos:=backup;
  121. end;
  122. procedure TParser.SkipSpaces;
  123. begin
  124. while fBuf[fPos] in [' ',#9] do begin
  125. inc(fPos);
  126. CheckLoadBuffer;
  127. end;
  128. end;
  129. procedure TParser.SkipWhitespace;
  130. begin
  131. while true do
  132. begin
  133. case fBuf[fPos] of
  134. ' ',#9 : SkipSpaces;
  135. #10,#13 : HandleNewLine
  136. else break;
  137. end;
  138. end;
  139. end;
  140. procedure TParser.HandleEof;
  141. begin
  142. fToken:=toEOF;
  143. fLastTokenStr:='';
  144. end;
  145. procedure TParser.HandleAlphaNum;
  146. begin
  147. fLastTokenStr:=GetAlphaNum;
  148. fToken:=toSymbol;
  149. end;
  150. procedure TParser.HandleNumber;
  151. type
  152. floatPunct = (fpDot,fpE);
  153. floatPuncts = set of floatPunct;
  154. var
  155. allowed : floatPuncts;
  156. begin
  157. fLastTokenStr:='';
  158. while IsNumber do
  159. ProcessChar;
  160. fToken:=toInteger;
  161. if (fBuf[fPos] in ['.','e','E']) then
  162. begin
  163. fToken:=toFloat;
  164. allowed:=[fpDot,fpE];
  165. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  166. begin
  167. case fBuf[fPos] of
  168. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  169. 'E','e' : if fpE in allowed then
  170. begin
  171. allowed:=[];
  172. ProcessChar;
  173. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  174. if not (fBuf[fPos] in ['0'..'9']) then
  175. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  176. end
  177. else break;
  178. end;
  179. ProcessChar;
  180. end;
  181. end;
  182. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  183. begin
  184. fFloatType:=fBuf[fPos];
  185. inc(fPos);
  186. CheckLoadBuffer;
  187. fToken:=toFloat;
  188. end
  189. else fFloatType:=#0;
  190. end;
  191. procedure TParser.HandleHexNumber;
  192. var valid : boolean;
  193. begin
  194. fLastTokenStr:='$';
  195. inc(fPos);
  196. CheckLoadBuffer;
  197. valid:=false;
  198. while IsHexNum do
  199. begin
  200. valid:=true;
  201. ProcessChar;
  202. end;
  203. if not valid then
  204. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  205. fToken:=toInteger;
  206. end;
  207. function TParser.HandleQuotedString: string;
  208. begin
  209. Result:='';
  210. inc(fPos);
  211. CheckLoadBuffer;
  212. while true do
  213. begin
  214. case fBuf[fPos] of
  215. #0 : ErrorStr(SParUnterminatedString);
  216. #13,#10 : ErrorStr(SParUnterminatedString);
  217. '''' : begin
  218. inc(fPos);
  219. CheckLoadBuffer;
  220. if fBuf[fPos]<>'''' then exit;
  221. end;
  222. end;
  223. Result:=Result+fBuf[fPos];
  224. inc(fPos);
  225. CheckLoadBuffer;
  226. end;
  227. end;
  228. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  229. WideChr: widechar; out StringChr: char);
  230. var i : integer;
  231. begin
  232. inc(fPos);
  233. CheckLoadBuffer;
  234. // read a word number
  235. i:=0;
  236. while IsNumber and (i<high(word)) do
  237. begin
  238. i:=i*10+ord(fBuf[fPos])-ord('0');
  239. inc(fPos);
  240. CheckLoadBuffer;
  241. end;
  242. if i>high(word) then i:=0;
  243. if i>127 then ascii:=false;
  244. WideChr:=widechar(word(i));
  245. if i<256 then
  246. StringChr:=chr(i)
  247. else
  248. StringChr:=#0;
  249. end;
  250. procedure TParser.HandleString;
  251. var ascii : boolean;
  252. s: string;
  253. w: WideChar;
  254. c: char;
  255. begin
  256. fLastTokenWStr:='';
  257. fLastTokenStr:='';
  258. ascii:=true;
  259. while true do
  260. begin
  261. case fBuf[fPos] of
  262. '''' :
  263. begin
  264. // avoid conversions,
  265. // On some systems conversion from ansistring to widestring and back
  266. // to ansistring does not give the original ansistring.
  267. // See bug http://bugs.freepascal.org/view.php?id=15841
  268. s:=HandleQuotedString;
  269. fLastTokenWStr:=fLastTokenWStr+UnicodeString(s);
  270. fLastTokenStr:=fLastTokenStr+s;
  271. end;
  272. '#' :
  273. begin
  274. HandleDecimalCharacter(ascii,w,c);
  275. fLastTokenWStr:=fLastTokenWStr+w;
  276. fLastTokenStr:=fLastTokenStr+c;
  277. end;
  278. else break;
  279. end;
  280. end;
  281. if ascii then
  282. fToken:=Classes.toString
  283. else
  284. fToken:=toWString;
  285. end;
  286. procedure TParser.HandleMinus;
  287. begin
  288. inc(fPos);
  289. CheckLoadBuffer;
  290. if IsNumber then
  291. begin
  292. HandleNumber;
  293. fLastTokenStr:='-'+fLastTokenStr;
  294. end
  295. else
  296. begin
  297. fToken:='-';
  298. fLastTokenStr:=fToken;
  299. end;
  300. end;
  301. procedure TParser.HandleUnknown;
  302. begin
  303. fToken:=fBuf[fPos];
  304. fLastTokenStr:=fToken;
  305. inc(fPos);
  306. CheckLoadBuffer;
  307. end;
  308. constructor TParser.Create(Stream: TStream);
  309. begin
  310. fStream:=Stream;
  311. fBuf:=GetMem(ParseBufSize+1);
  312. fBufLen:=0;
  313. fPos:=0;
  314. fDeltaPos:=1;
  315. fSourceLine:=1;
  316. fEofReached:=false;
  317. fLastTokenStr:='';
  318. fLastTokenWStr:='';
  319. fFloatType:=#0;
  320. fToken:=#0;
  321. LoadBuffer;
  322. SkipBom;
  323. NextToken;
  324. end;
  325. destructor TParser.Destroy;
  326. begin
  327. fStream.Position:=SourcePos;
  328. FreeMem(fBuf);
  329. end;
  330. procedure TParser.CheckToken(T: Char);
  331. begin
  332. if fToken<>T then
  333. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  334. end;
  335. procedure TParser.CheckTokenSymbol(const S: string);
  336. begin
  337. CheckToken(toSymbol);
  338. if CompareText(fLastTokenStr,S)<>0 then
  339. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  340. end;
  341. procedure TParser.Error(const Ident: string);
  342. begin
  343. ErrorStr(Ident);
  344. end;
  345. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  346. begin
  347. ErrorStr(Format(Ident,Args));
  348. end;
  349. procedure TParser.ErrorStr(const Message: string);
  350. begin
  351. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  352. end;
  353. procedure TParser.HexToBinary(Stream: TStream);
  354. var outbuf : array[0..ParseBufSize-1] of byte;
  355. b : byte;
  356. i : integer;
  357. begin
  358. i:=0;
  359. SkipWhitespace;
  360. while IsHexNum do
  361. begin
  362. b:=(GetHexValue(fBuf[fPos]) shl 4);
  363. inc(fPos);
  364. CheckLoadBuffer;
  365. if not IsHexNum then
  366. Error(SParUnterminatedBinValue);
  367. b:=b or GetHexValue(fBuf[fPos]);
  368. inc(fPos);
  369. CheckLoadBuffer;
  370. outbuf[i]:=b;
  371. inc(i);
  372. if i>=ParseBufSize then
  373. begin
  374. Stream.WriteBuffer(outbuf[0],i);
  375. i:=0;
  376. end;
  377. SkipWhitespace;
  378. end;
  379. if i>0 then
  380. Stream.WriteBuffer(outbuf[0],i);
  381. NextToken;
  382. end;
  383. function TParser.NextToken: Char;
  384. begin
  385. SkipWhiteSpace;
  386. if fEofReached then
  387. HandleEof
  388. else
  389. case fBuf[fPos] of
  390. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  391. '$' : HandleHexNumber;
  392. '-' : HandleMinus;
  393. '0'..'9' : HandleNumber;
  394. '''','#' : HandleString
  395. else
  396. HandleUnknown;
  397. end;
  398. Result:=fToken;
  399. end;
  400. function TParser.SourcePos: Longint;
  401. begin
  402. Result:=fStream.Position-fBufLen+fPos;
  403. end;
  404. function TParser.TokenComponentIdent: string;
  405. begin
  406. if fToken<>toSymbol then
  407. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  408. CheckLoadBuffer;
  409. while fBuf[fPos]='.' do
  410. begin
  411. ProcessChar;
  412. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  413. end;
  414. Result:=fLastTokenStr;
  415. end;
  416. {$ifndef FPUNONE}
  417. Function TParser.TokenFloat: Extended;
  418. var errcode : word;
  419. begin
  420. Val(fLastTokenStr,Result,errcode);
  421. if errcode<>0 then
  422. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  423. end;
  424. {$endif}
  425. Function TParser.TokenInt: Int64;
  426. begin
  427. if not TryStrToInt64(fLastTokenStr,Result) then
  428. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  429. end;
  430. function TParser.TokenString: string;
  431. begin
  432. case fToken of
  433. toWString : Result:=string(fLastTokenWStr);
  434. toFloat : if fFloatType<>#0 then
  435. Result:=fLastTokenStr+fFloatType
  436. else Result:=fLastTokenStr
  437. else
  438. Result:=fLastTokenStr;
  439. end;
  440. end;
  441. function TParser.TokenWideString: WideString;
  442. begin
  443. if fToken=toWString then
  444. Result:=fLastTokenWStr
  445. else
  446. Result:=WideString(fLastTokenStr);
  447. end;
  448. function TParser.TokenSymbolIs(const S: string): Boolean;
  449. begin
  450. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  451. end;