parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. {$ifdef CPU16}
  15. { Avoid too big local stack use for
  16. MSDOS tiny memory model that uses less than 4096
  17. bytes for total stack by default. }
  18. ParseBufSize = 512;
  19. {$else not CPU16}
  20. ParseBufSize = 4096;
  21. {$endif not CPU16}
  22. LastSpecialToken = 5;
  23. TokNames : array[0..LastSpecialToken] of string =
  24. (
  25. 'EOF',
  26. 'Symbol',
  27. 'String',
  28. 'Integer',
  29. 'Float',
  30. 'WideString'
  31. );
  32. function TParser.GetTokenName(aTok: char): string;
  33. begin
  34. if ord(aTok) <= LastSpecialToken then
  35. Result:=TokNames[ord(aTok)]
  36. else Result:=aTok;
  37. end;
  38. procedure TParser.LoadBuffer;
  39. var
  40. BytesRead: integer;
  41. begin
  42. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  43. FBuf[BytesRead] := #0;
  44. Inc(FDeltaPos, BytesRead);
  45. FPos := 0;
  46. FBufLen := BytesRead;
  47. FEofReached:=BytesRead = 0;
  48. end;
  49. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  50. begin
  51. if fBuf[fPos]=#0 then LoadBuffer;
  52. end;
  53. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  54. begin
  55. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  56. inc(fPos);
  57. CheckLoadBuffer;
  58. end;
  59. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  60. begin
  61. Result:=fBuf[fPos] in ['0'..'9'];
  62. end;
  63. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  64. begin
  65. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  66. end;
  67. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  68. begin
  69. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  70. end;
  71. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  72. begin
  73. Result:=IsAlpha or IsNumber;
  74. end;
  75. function TParser.GetHexValue(c: char): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  76. begin
  77. case c of
  78. '0'..'9' : Result:=ord(c)-$30;
  79. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  80. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  81. end;
  82. end;
  83. function TParser.GetAlphaNum: string;
  84. begin
  85. if not IsAlpha then
  86. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  87. Result:='';
  88. while IsAlphaNum do
  89. begin
  90. Result:=Result+fBuf[fPos];
  91. inc(fPos);
  92. CheckLoadBuffer;
  93. end;
  94. end;
  95. procedure TParser.HandleNewLine;
  96. begin
  97. if fBuf[fPos]=#13 then //CR
  98. begin
  99. inc(fPos);
  100. CheckLoadBuffer;
  101. end;
  102. if fBuf[fPos]=#10 then
  103. begin
  104. inc(fPos); //CR LF or LF
  105. CheckLoadBuffer;
  106. end;
  107. inc(fSourceLine);
  108. fDeltaPos:=-(fPos-1);
  109. end;
  110. procedure TParser.SkipBOM;
  111. var
  112. i : integer;
  113. bom : string[3];
  114. backup : integer;
  115. begin
  116. i:=1;
  117. bom:=' ';
  118. backup:=fPos;
  119. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  120. begin
  121. bom[i]:=fBuf[fPos];
  122. inc(fPos);
  123. CheckLoadBuffer;
  124. inc(i);
  125. end;
  126. if (bom<>(#$EF+#$BB+#$BF)) then
  127. fPos:=backup;
  128. end;
  129. procedure TParser.SkipSpaces;
  130. begin
  131. while fBuf[fPos] in [' ',#9] do begin
  132. inc(fPos);
  133. CheckLoadBuffer;
  134. end;
  135. end;
  136. procedure TParser.SkipWhitespace;
  137. begin
  138. while true do
  139. begin
  140. case fBuf[fPos] of
  141. ' ',#9 : SkipSpaces;
  142. #10,#13 : HandleNewLine
  143. else break;
  144. end;
  145. end;
  146. end;
  147. procedure TParser.HandleEof;
  148. begin
  149. fToken:=toEOF;
  150. fLastTokenStr:='';
  151. end;
  152. procedure TParser.HandleAlphaNum;
  153. begin
  154. fLastTokenStr:=GetAlphaNum;
  155. fToken:=toSymbol;
  156. end;
  157. procedure TParser.HandleNumber;
  158. type
  159. floatPunct = (fpDot,fpE);
  160. floatPuncts = set of floatPunct;
  161. var
  162. allowed : floatPuncts;
  163. begin
  164. fLastTokenStr:='';
  165. while IsNumber do
  166. ProcessChar;
  167. fToken:=toInteger;
  168. if (fBuf[fPos] in ['.','e','E']) then
  169. begin
  170. fToken:=toFloat;
  171. allowed:=[fpDot,fpE];
  172. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  173. begin
  174. case fBuf[fPos] of
  175. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  176. 'E','e' : if fpE in allowed then
  177. begin
  178. allowed:=[];
  179. ProcessChar;
  180. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  181. if not (fBuf[fPos] in ['0'..'9']) then
  182. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  183. end
  184. else break;
  185. end;
  186. ProcessChar;
  187. end;
  188. end;
  189. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  190. begin
  191. fFloatType:=fBuf[fPos];
  192. inc(fPos);
  193. CheckLoadBuffer;
  194. fToken:=toFloat;
  195. end
  196. else fFloatType:=#0;
  197. end;
  198. procedure TParser.HandleHexNumber;
  199. var valid : boolean;
  200. begin
  201. fLastTokenStr:='$';
  202. inc(fPos);
  203. CheckLoadBuffer;
  204. valid:=false;
  205. while IsHexNum do
  206. begin
  207. valid:=true;
  208. ProcessChar;
  209. end;
  210. if not valid then
  211. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  212. fToken:=toInteger;
  213. end;
  214. function TParser.HandleQuotedString: string;
  215. begin
  216. Result:='';
  217. inc(fPos);
  218. CheckLoadBuffer;
  219. while true do
  220. begin
  221. case fBuf[fPos] of
  222. #0 : ErrorStr(SParUnterminatedString);
  223. #13,#10 : ErrorStr(SParUnterminatedString);
  224. '''' : begin
  225. inc(fPos);
  226. CheckLoadBuffer;
  227. if fBuf[fPos]<>'''' then exit;
  228. end;
  229. end;
  230. Result:=Result+fBuf[fPos];
  231. inc(fPos);
  232. CheckLoadBuffer;
  233. end;
  234. end;
  235. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  236. WideChr: widechar; out StringChr: char);
  237. var i : integer;
  238. begin
  239. inc(fPos);
  240. CheckLoadBuffer;
  241. // read a word number
  242. i:=0;
  243. while IsNumber and (i<high(word)) do
  244. begin
  245. i:=i*10+ord(fBuf[fPos])-ord('0');
  246. inc(fPos);
  247. CheckLoadBuffer;
  248. end;
  249. if i>high(word) then i:=0;
  250. if i>127 then ascii:=false;
  251. WideChr:=widechar(word(i));
  252. if i<256 then
  253. StringChr:=chr(i)
  254. else
  255. StringChr:=#0;
  256. end;
  257. procedure TParser.HandleString;
  258. var ascii : boolean;
  259. s: string;
  260. w: WideChar;
  261. c: char;
  262. begin
  263. fLastTokenWStr:='';
  264. fLastTokenStr:='';
  265. ascii:=true;
  266. while true do
  267. begin
  268. case fBuf[fPos] of
  269. '''' :
  270. begin
  271. // avoid conversions,
  272. // On some systems conversion from ansistring to widestring and back
  273. // to ansistring does not give the original ansistring.
  274. // See bug http://bugs.freepascal.org/view.php?id=15841
  275. s:=HandleQuotedString;
  276. fLastTokenWStr:=fLastTokenWStr+UnicodeString(s);
  277. fLastTokenStr:=fLastTokenStr+s;
  278. end;
  279. '#' :
  280. begin
  281. HandleDecimalCharacter(ascii,w,c);
  282. fLastTokenWStr:=fLastTokenWStr+w;
  283. fLastTokenStr:=fLastTokenStr+c;
  284. end;
  285. else break;
  286. end;
  287. end;
  288. if ascii then
  289. fToken:=Classes.toString
  290. else
  291. fToken:=toWString;
  292. end;
  293. procedure TParser.HandleMinus;
  294. begin
  295. inc(fPos);
  296. CheckLoadBuffer;
  297. if IsNumber then
  298. begin
  299. HandleNumber;
  300. fLastTokenStr:='-'+fLastTokenStr;
  301. end
  302. else
  303. begin
  304. fToken:='-';
  305. fLastTokenStr:=fToken;
  306. end;
  307. end;
  308. procedure TParser.HandleUnknown;
  309. begin
  310. fToken:=fBuf[fPos];
  311. fLastTokenStr:=fToken;
  312. inc(fPos);
  313. CheckLoadBuffer;
  314. end;
  315. constructor TParser.Create(Stream: TStream);
  316. begin
  317. fStream:=Stream;
  318. fBuf:=GetMem(ParseBufSize+1);
  319. fBufLen:=0;
  320. fPos:=0;
  321. fDeltaPos:=1;
  322. fSourceLine:=1;
  323. fEofReached:=false;
  324. fLastTokenStr:='';
  325. fLastTokenWStr:='';
  326. fFloatType:=#0;
  327. fToken:=#0;
  328. LoadBuffer;
  329. SkipBom;
  330. NextToken;
  331. end;
  332. destructor TParser.Destroy;
  333. Var
  334. aCount : Integer;
  335. begin
  336. if fToken=toWString then
  337. aCount:=Length(fLastTokenWStr)*2
  338. else
  339. aCount:=Length(fLastTokenStr);
  340. fStream.Position:=SourcePos-aCount;
  341. FreeMem(fBuf);
  342. end;
  343. procedure TParser.CheckToken(T: Char);
  344. begin
  345. if fToken<>T then
  346. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  347. end;
  348. procedure TParser.CheckTokenSymbol(const S: string);
  349. begin
  350. CheckToken(toSymbol);
  351. if CompareText(fLastTokenStr,S)<>0 then
  352. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  353. end;
  354. procedure TParser.Error(const Ident: string);
  355. begin
  356. ErrorStr(Ident);
  357. end;
  358. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  359. begin
  360. ErrorStr(Format(Ident,Args));
  361. end;
  362. procedure TParser.ErrorStr(const Message: string);
  363. begin
  364. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  365. end;
  366. procedure TParser.HexToBinary(Stream: TStream);
  367. var outbuf : array[0..ParseBufSize-1] of byte;
  368. b : byte;
  369. i : integer;
  370. begin
  371. i:=0;
  372. SkipWhitespace;
  373. while IsHexNum do
  374. begin
  375. b:=(GetHexValue(fBuf[fPos]) shl 4);
  376. inc(fPos);
  377. CheckLoadBuffer;
  378. if not IsHexNum then
  379. Error(SParUnterminatedBinValue);
  380. b:=b or GetHexValue(fBuf[fPos]);
  381. inc(fPos);
  382. CheckLoadBuffer;
  383. outbuf[i]:=b;
  384. inc(i);
  385. if i>=ParseBufSize then
  386. begin
  387. Stream.WriteBuffer(outbuf[0],i);
  388. i:=0;
  389. end;
  390. SkipWhitespace;
  391. end;
  392. if i>0 then
  393. Stream.WriteBuffer(outbuf[0],i);
  394. NextToken;
  395. end;
  396. function TParser.NextToken: Char;
  397. begin
  398. SkipWhiteSpace;
  399. if fEofReached then
  400. HandleEof
  401. else
  402. case fBuf[fPos] of
  403. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  404. '$' : HandleHexNumber;
  405. '-' : HandleMinus;
  406. '0'..'9' : HandleNumber;
  407. '''','#' : HandleString;
  408. else
  409. HandleUnknown;
  410. end;
  411. Result:=fToken;
  412. end;
  413. function TParser.SourcePos: Longint;
  414. begin
  415. Result:=fStream.Position-fBufLen+fPos;
  416. end;
  417. function TParser.TokenComponentIdent: string;
  418. begin
  419. if fToken<>toSymbol then
  420. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  421. CheckLoadBuffer;
  422. while fBuf[fPos]='.' do
  423. begin
  424. ProcessChar;
  425. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  426. end;
  427. Result:=fLastTokenStr;
  428. end;
  429. {$ifndef FPUNONE}
  430. Function TParser.TokenFloat: Extended;
  431. var errcode : word;
  432. begin
  433. Val(fLastTokenStr,Result,errcode);
  434. if errcode<>0 then
  435. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  436. end;
  437. {$endif}
  438. Function TParser.TokenInt: Int64;
  439. begin
  440. if not TryStrToInt64(fLastTokenStr,Result) then
  441. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  442. end;
  443. function TParser.TokenString: string;
  444. begin
  445. case fToken of
  446. toWString : Result:=string(fLastTokenWStr);
  447. toFloat : if fFloatType<>#0 then
  448. Result:=fLastTokenStr+fFloatType
  449. else Result:=fLastTokenStr;
  450. else
  451. Result:=fLastTokenStr;
  452. end;
  453. end;
  454. function TParser.TokenWideString: WideString;
  455. begin
  456. if fToken=toWString then
  457. Result:=fLastTokenWStr
  458. else
  459. Result:=WideString(fLastTokenStr);
  460. end;
  461. function TParser.TokenSymbolIs(const S: string): Boolean;
  462. begin
  463. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  464. end;