parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. {%MainUnit classes.pp}
  2. {
  3. This file is part of the Free Component Library (FCL)
  4. Copyright (c) 1999-2007 by the Free Pascal development team
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {****************************************************************************}
  12. {* TParser *}
  13. {****************************************************************************}
  14. const
  15. {$ifdef CPU16}
  16. { Avoid too big local stack use for
  17. MSDOS tiny memory model that uses less than 4096
  18. bytes for total stack by default. }
  19. ParseBufSize = 512;
  20. {$else not CPU16}
  21. ParseBufSize = 4096;
  22. {$endif not CPU16}
  23. LastSpecialToken = 5;
  24. TokNames : array[0..LastSpecialToken] of string =
  25. (
  26. 'EOF',
  27. 'Symbol',
  28. 'String',
  29. 'Integer',
  30. 'Float',
  31. 'WideString'
  32. );
  33. function TParser.GetTokenName(aTok: Char): string;
  34. begin
  35. if ord(aTok) <= LastSpecialToken then
  36. Result:=TokNames[ord(aTok)]
  37. else Result:=aTok;
  38. end;
  39. procedure TParser.LoadBuffer;
  40. var
  41. BytesRead: integer;
  42. begin
  43. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  44. FBuf[BytesRead] := #0;
  45. Inc(FDeltaPos, BytesRead);
  46. FPos := 0;
  47. FBufLen := BytesRead;
  48. FEofReached:=BytesRead = 0;
  49. end;
  50. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  51. begin
  52. if fBuf[fPos]=#0 then LoadBuffer;
  53. end;
  54. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  55. begin
  56. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  57. inc(fPos);
  58. CheckLoadBuffer;
  59. end;
  60. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  61. begin
  62. Result:=fBuf[fPos] in ['0'..'9'];
  63. end;
  64. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  65. begin
  66. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  67. end;
  68. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  69. begin
  70. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  71. end;
  72. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  73. begin
  74. Result:=IsAlpha or IsNumber;
  75. end;
  76. function TParser.GetHexValue(c: AnsiChar): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  77. begin
  78. case c of
  79. '0'..'9' : Result:=ord(c)-$30;
  80. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  81. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  82. end;
  83. end;
  84. function TParser.GetAlphaNum: string;
  85. begin
  86. if not IsAlpha then
  87. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  88. Result:='';
  89. while IsAlphaNum do
  90. begin
  91. Result:=Result+fBuf[fPos];
  92. inc(fPos);
  93. CheckLoadBuffer;
  94. end;
  95. end;
  96. procedure TParser.HandleNewLine;
  97. begin
  98. if fBuf[fPos]=#13 then //CR
  99. begin
  100. inc(fPos);
  101. CheckLoadBuffer;
  102. end;
  103. if fBuf[fPos]=#10 then
  104. begin
  105. inc(fPos); //CR LF or LF
  106. CheckLoadBuffer;
  107. end;
  108. inc(fSourceLine);
  109. fDeltaPos:=-(fPos-1);
  110. end;
  111. procedure TParser.SkipBOM;
  112. var
  113. i : integer;
  114. bom : string[3];
  115. backup : integer;
  116. begin
  117. i:=1;
  118. bom:=' ';
  119. backup:=fPos;
  120. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  121. begin
  122. bom[i]:=fBuf[fPos];
  123. inc(fPos);
  124. CheckLoadBuffer;
  125. inc(i);
  126. end;
  127. if (bom<>(#$EF+#$BB+#$BF)) then
  128. fPos:=backup;
  129. end;
  130. procedure TParser.SkipSpaces;
  131. begin
  132. while fBuf[fPos] in [' ',#9] do begin
  133. inc(fPos);
  134. CheckLoadBuffer;
  135. end;
  136. end;
  137. procedure TParser.SkipWhitespace;
  138. begin
  139. while true do
  140. begin
  141. case fBuf[fPos] of
  142. ' ',#9 : SkipSpaces;
  143. #10,#13 : HandleNewLine
  144. else break;
  145. end;
  146. end;
  147. end;
  148. procedure TParser.HandleEof;
  149. begin
  150. fToken:=toEOF;
  151. fLastTokenStr:='';
  152. end;
  153. procedure TParser.HandleAlphaNum;
  154. begin
  155. fLastTokenStr:=GetAlphaNum;
  156. fToken:=toSymbol;
  157. end;
  158. procedure TParser.HandleNumber;
  159. type
  160. floatPunct = (fpDot,fpE);
  161. floatPuncts = set of floatPunct;
  162. var
  163. allowed : floatPuncts;
  164. begin
  165. fLastTokenStr:='';
  166. while IsNumber do
  167. ProcessChar;
  168. fToken:=toInteger;
  169. if (fBuf[fPos] in ['.','e','E']) then
  170. begin
  171. fToken:=toFloat;
  172. allowed:=[fpDot,fpE];
  173. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  174. begin
  175. case fBuf[fPos] of
  176. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  177. 'E','e' : if fpE in allowed then
  178. begin
  179. allowed:=[];
  180. ProcessChar;
  181. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  182. if not (fBuf[fPos] in ['0'..'9']) then
  183. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  184. end
  185. else break;
  186. end;
  187. ProcessChar;
  188. end;
  189. end;
  190. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  191. begin
  192. fFloatType:=fBuf[fPos];
  193. inc(fPos);
  194. CheckLoadBuffer;
  195. fToken:=toFloat;
  196. end
  197. else fFloatType:=#0;
  198. end;
  199. procedure TParser.HandleHexNumber;
  200. var valid : boolean;
  201. begin
  202. fLastTokenStr:='$';
  203. inc(fPos);
  204. CheckLoadBuffer;
  205. valid:=false;
  206. while IsHexNum do
  207. begin
  208. valid:=true;
  209. ProcessChar;
  210. end;
  211. if not valid then
  212. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  213. fToken:=toInteger;
  214. end;
  215. function TParser.HandleQuotedString: string;
  216. begin
  217. Result:='';
  218. inc(fPos);
  219. CheckLoadBuffer;
  220. while true do
  221. begin
  222. case fBuf[fPos] of
  223. #0 : ErrorStr(SParUnterminatedString);
  224. #13,#10 : ErrorStr(SParUnterminatedString);
  225. '''' : begin
  226. inc(fPos);
  227. CheckLoadBuffer;
  228. if fBuf[fPos]<>'''' then exit;
  229. end;
  230. end;
  231. Result:=Result+fBuf[fPos];
  232. inc(fPos);
  233. CheckLoadBuffer;
  234. end;
  235. end;
  236. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  237. WideChr: widechar; out StringChr: AnsiChar);
  238. var i : integer;
  239. begin
  240. inc(fPos);
  241. CheckLoadBuffer;
  242. // read a word number
  243. i:=0;
  244. while IsNumber and (i<high(word)) do
  245. begin
  246. i:=i*10+ord(fBuf[fPos])-ord('0');
  247. inc(fPos);
  248. CheckLoadBuffer;
  249. end;
  250. if i>high(word) then i:=0;
  251. if i>127 then ascii:=false;
  252. WideChr:=widechar(word(i));
  253. if i<256 then
  254. StringChr:=chr(i)
  255. else
  256. StringChr:=#0;
  257. end;
  258. procedure TParser.HandleString;
  259. var ascii : boolean;
  260. s: string;
  261. w: WideChar;
  262. c: AnsiChar;
  263. begin
  264. fLastTokenWStr:='';
  265. fLastTokenStr:='';
  266. ascii:=true;
  267. while true do
  268. begin
  269. case fBuf[fPos] of
  270. '''' :
  271. begin
  272. // avoid conversions,
  273. // On some systems conversion from ansistring to widestring and back
  274. // to ansistring does not give the original ansistring.
  275. // See bug http://bugs.freepascal.org/view.php?id=15841
  276. s:=HandleQuotedString;
  277. fLastTokenWStr:=fLastTokenWStr+UnicodeString(s);
  278. fLastTokenStr:=fLastTokenStr+s;
  279. end;
  280. '#' :
  281. begin
  282. HandleDecimalCharacter(ascii,w,c);
  283. fLastTokenWStr:=fLastTokenWStr+w;
  284. fLastTokenStr:=fLastTokenStr+c;
  285. end;
  286. else break;
  287. end;
  288. end;
  289. if ascii then
  290. fToken:={$IFDEF FPC_DOTTEDUNITS}System.{$ENDIF}Classes.toString
  291. else
  292. fToken:=toWString;
  293. end;
  294. procedure TParser.HandleMinus;
  295. begin
  296. inc(fPos);
  297. CheckLoadBuffer;
  298. if IsNumber then
  299. begin
  300. HandleNumber;
  301. fLastTokenStr:='-'+fLastTokenStr;
  302. end
  303. else
  304. begin
  305. fToken:='-';
  306. fLastTokenStr:=fToken;
  307. end;
  308. end;
  309. procedure TParser.HandleUnknown;
  310. begin
  311. fToken:=fBuf[fPos];
  312. fLastTokenStr:=fToken;
  313. inc(fPos);
  314. CheckLoadBuffer;
  315. end;
  316. constructor TParser.Create(Stream: TStream);
  317. begin
  318. fStream:=Stream;
  319. fBuf:=GetMem(ParseBufSize+1);
  320. fBufLen:=0;
  321. fPos:=0;
  322. fDeltaPos:=1;
  323. fSourceLine:=1;
  324. fEofReached:=false;
  325. fLastTokenStr:='';
  326. fLastTokenWStr:='';
  327. fFloatType:=#0;
  328. fToken:=#0;
  329. LoadBuffer;
  330. SkipBom;
  331. NextToken;
  332. end;
  333. destructor TParser.Destroy;
  334. Var
  335. aCount : Integer;
  336. begin
  337. if fToken=toWString then
  338. aCount:=Length(fLastTokenWStr)*2
  339. else
  340. aCount:=Length(fLastTokenStr);
  341. fStream.Position:=SourcePos-aCount;
  342. FreeMem(fBuf);
  343. end;
  344. procedure TParser.CheckToken(T: Char);
  345. begin
  346. if fToken<>T then
  347. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  348. end;
  349. procedure TParser.CheckTokenSymbol(const S: string);
  350. begin
  351. CheckToken(toSymbol);
  352. if CompareText(fLastTokenStr,S)<>0 then
  353. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  354. end;
  355. procedure TParser.Error(const Ident: string);
  356. begin
  357. ErrorStr(Ident);
  358. end;
  359. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  360. begin
  361. ErrorStr(Format(Ident,Args));
  362. end;
  363. procedure TParser.ErrorStr(const Message: string);
  364. begin
  365. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  366. end;
  367. procedure TParser.HexToBinary(Stream: TStream);
  368. var outbuf : array[0..ParseBufSize-1] of byte;
  369. b : byte;
  370. i : integer;
  371. begin
  372. i:=0;
  373. SkipWhitespace;
  374. while IsHexNum do
  375. begin
  376. b:=(GetHexValue(fBuf[fPos]) shl 4);
  377. inc(fPos);
  378. CheckLoadBuffer;
  379. if not IsHexNum then
  380. Error(SParUnterminatedBinValue);
  381. b:=b or GetHexValue(fBuf[fPos]);
  382. inc(fPos);
  383. CheckLoadBuffer;
  384. outbuf[i]:=b;
  385. inc(i);
  386. if i>=ParseBufSize then
  387. begin
  388. Stream.WriteBuffer(outbuf[0],i);
  389. i:=0;
  390. end;
  391. SkipWhitespace;
  392. end;
  393. if i>0 then
  394. Stream.WriteBuffer(outbuf[0],i);
  395. NextToken;
  396. end;
  397. function TParser.NextToken: Char;
  398. begin
  399. SkipWhiteSpace;
  400. if fEofReached then
  401. HandleEof
  402. else
  403. case fBuf[fPos] of
  404. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  405. '$' : HandleHexNumber;
  406. '-' : HandleMinus;
  407. '0'..'9' : HandleNumber;
  408. '''','#' : HandleString;
  409. else
  410. HandleUnknown;
  411. end;
  412. Result:=fToken;
  413. end;
  414. function TParser.SourcePos: Longint;
  415. begin
  416. Result:=fStream.Position-fBufLen+fPos;
  417. end;
  418. function TParser.TokenComponentIdent: string;
  419. begin
  420. if fToken<>toSymbol then
  421. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  422. CheckLoadBuffer;
  423. while fBuf[fPos]='.' do
  424. begin
  425. ProcessChar;
  426. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  427. end;
  428. Result:=fLastTokenStr;
  429. end;
  430. {$ifndef FPUNONE}
  431. Function TParser.TokenFloat: Extended;
  432. var errcode : word;
  433. begin
  434. Val(fLastTokenStr,Result,errcode);
  435. if errcode<>0 then
  436. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  437. end;
  438. {$endif}
  439. Function TParser.TokenInt: Int64;
  440. begin
  441. if not TryStrToInt64(fLastTokenStr,Result) then
  442. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  443. end;
  444. function TParser.TokenString: string;
  445. begin
  446. case fToken of
  447. toWString : Result:=string(fLastTokenWStr);
  448. toFloat : if fFloatType<>#0 then
  449. Result:=fLastTokenStr+fFloatType
  450. else Result:=fLastTokenStr;
  451. else
  452. Result:=fLastTokenStr;
  453. end;
  454. end;
  455. function TParser.TokenWideString: WideString;
  456. begin
  457. if fToken=toWString then
  458. Result:=fLastTokenWStr
  459. else
  460. Result:=WideString(fLastTokenStr);
  461. end;
  462. function TParser.TokenSymbolIs(const S: string): Boolean;
  463. begin
  464. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  465. end;