parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. {%MainUnit classes.pp}
  2. {
  3. This file is part of the Free Component Library (FCL)
  4. Copyright (c) 1999-2007 by the Free Pascal development team
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {****************************************************************************}
  12. {* TParser *}
  13. {****************************************************************************}
  14. const
  15. {$ifdef CPU16}
  16. { Avoid too big local stack use for
  17. MSDOS tiny memory model that uses less than 4096
  18. bytes for total stack by default. }
  19. ParseBufSize = 512;
  20. {$else not CPU16}
  21. ParseBufSize = 4096;
  22. {$endif not CPU16}
  23. LastSpecialToken = 5;
  24. TokNames : array[0..LastSpecialToken] of string =
  25. (
  26. 'EOF',
  27. 'Symbol',
  28. 'String',
  29. 'Integer',
  30. 'Float',
  31. 'WideString'
  32. );
  33. function TParser.GetTokenName(aTok: AnsiChar): string;
  34. begin
  35. if ord(aTok) <= LastSpecialToken then
  36. Result:=TokNames[ord(aTok)]
  37. else Result:=aTok;
  38. end;
  39. procedure TParser.LoadBuffer;
  40. var
  41. BytesRead: integer;
  42. begin
  43. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  44. FBuf[BytesRead] := #0;
  45. Inc(FDeltaPos, BytesRead);
  46. FPos := 0;
  47. FBufLen := BytesRead;
  48. FEofReached:=BytesRead = 0;
  49. end;
  50. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  51. begin
  52. if fBuf[fPos]=#0 then LoadBuffer;
  53. end;
  54. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  55. begin
  56. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  57. inc(fPos);
  58. CheckLoadBuffer;
  59. end;
  60. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  61. begin
  62. Result:=fBuf[fPos] in ['0'..'9'];
  63. end;
  64. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  65. begin
  66. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  67. end;
  68. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  69. begin
  70. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  71. end;
  72. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  73. begin
  74. Result:=IsAlpha or IsNumber;
  75. end;
  76. function TParser.GetHexValue(c: AnsiChar): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  77. begin
  78. case c of
  79. '0'..'9' : Result:=ord(c)-$30;
  80. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  81. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  82. end;
  83. end;
  84. function TParser.GetAlphaNum: string;
  85. begin
  86. if not IsAlpha then
  87. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  88. Result:='';
  89. while IsAlphaNum do
  90. begin
  91. Result:=Result+fBuf[fPos];
  92. inc(fPos);
  93. CheckLoadBuffer;
  94. end;
  95. end;
  96. procedure TParser.HandleNewLine;
  97. begin
  98. if fBuf[fPos]=#13 then //CR
  99. begin
  100. inc(fPos);
  101. CheckLoadBuffer;
  102. end;
  103. if fBuf[fPos]=#10 then
  104. begin
  105. inc(fPos); //CR LF or LF
  106. CheckLoadBuffer;
  107. end;
  108. inc(fSourceLine);
  109. fDeltaPos:=-(fPos-1);
  110. end;
  111. procedure TParser.SkipBOM;
  112. const
  113. sbom : string[3] = #$EF#$BB#$BF;
  114. var
  115. i : integer;
  116. bom : string[3];
  117. backup : integer;
  118. begin
  119. i:=1;
  120. bom:=' ';
  121. backup:=fPos;
  122. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  123. begin
  124. bom[i]:=fBuf[fPos];
  125. inc(fPos);
  126. CheckLoadBuffer;
  127. inc(i);
  128. end;
  129. if (bom<>sBom) then
  130. fPos:=backup;
  131. end;
  132. procedure TParser.SkipSpaces;
  133. begin
  134. while fBuf[fPos] in [' ',#9] do begin
  135. inc(fPos);
  136. CheckLoadBuffer;
  137. end;
  138. end;
  139. procedure TParser.SkipWhitespace;
  140. begin
  141. while true do
  142. begin
  143. case fBuf[fPos] of
  144. ' ',#9 : SkipSpaces;
  145. #10,#13 : HandleNewLine
  146. else break;
  147. end;
  148. end;
  149. end;
  150. procedure TParser.HandleEof;
  151. begin
  152. fToken:=toEOF;
  153. fLastTokenStr:='';
  154. end;
  155. procedure TParser.HandleAlphaNum;
  156. begin
  157. fLastTokenStr:=GetAlphaNum;
  158. fToken:=toSymbol;
  159. end;
  160. procedure TParser.HandleNumber;
  161. type
  162. floatPunct = (fpDot,fpE);
  163. floatPuncts = set of floatPunct;
  164. var
  165. allowed : floatPuncts;
  166. begin
  167. fLastTokenStr:='';
  168. while IsNumber do
  169. ProcessChar;
  170. fToken:=toInteger;
  171. if (fBuf[fPos] in ['.','e','E']) then
  172. begin
  173. fToken:=toFloat;
  174. allowed:=[fpDot,fpE];
  175. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  176. begin
  177. case fBuf[fPos] of
  178. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  179. 'E','e' : if fpE in allowed then
  180. begin
  181. allowed:=[];
  182. ProcessChar;
  183. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  184. if not (fBuf[fPos] in ['0'..'9']) then
  185. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  186. end
  187. else break;
  188. end;
  189. ProcessChar;
  190. end;
  191. end;
  192. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  193. begin
  194. fFloatType:=fBuf[fPos];
  195. inc(fPos);
  196. CheckLoadBuffer;
  197. fToken:=toFloat;
  198. end
  199. else fFloatType:=#0;
  200. end;
  201. procedure TParser.HandleHexNumber;
  202. var valid : boolean;
  203. begin
  204. fLastTokenStr:='$';
  205. inc(fPos);
  206. CheckLoadBuffer;
  207. valid:=false;
  208. while IsHexNum do
  209. begin
  210. valid:=true;
  211. ProcessChar;
  212. end;
  213. if not valid then
  214. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  215. fToken:=toInteger;
  216. end;
  217. function TParser.HandleQuotedString: string;
  218. begin
  219. Result:='';
  220. inc(fPos);
  221. CheckLoadBuffer;
  222. while true do
  223. begin
  224. case fBuf[fPos] of
  225. #0 : ErrorStr(SParUnterminatedString);
  226. #13,#10 : ErrorStr(SParUnterminatedString);
  227. '''' : begin
  228. inc(fPos);
  229. CheckLoadBuffer;
  230. if fBuf[fPos]<>'''' then exit;
  231. end;
  232. end;
  233. Result:=Result+fBuf[fPos];
  234. inc(fPos);
  235. CheckLoadBuffer;
  236. end;
  237. end;
  238. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  239. WideChr: widechar; out StringChr: AnsiChar);
  240. var i : integer;
  241. begin
  242. inc(fPos);
  243. CheckLoadBuffer;
  244. // read a word number
  245. i:=0;
  246. while IsNumber and (i<high(word)) do
  247. begin
  248. i:=i*10+ord(fBuf[fPos])-ord('0');
  249. inc(fPos);
  250. CheckLoadBuffer;
  251. end;
  252. if i>high(word) then i:=0;
  253. if i>127 then ascii:=false;
  254. WideChr:=widechar(word(i));
  255. if i<256 then
  256. StringChr:=chr(i)
  257. else
  258. StringChr:=#0;
  259. end;
  260. procedure TParser.HandleString;
  261. var ascii : boolean;
  262. s: string;
  263. w: WideChar;
  264. c: AnsiChar;
  265. begin
  266. fLastTokenWStr:='';
  267. fLastTokenStr:='';
  268. ascii:=true;
  269. while true do
  270. begin
  271. case fBuf[fPos] of
  272. '''' :
  273. begin
  274. // avoid conversions,
  275. // On some systems conversion from ansistring to widestring and back
  276. // to ansistring does not give the original ansistring.
  277. // See bug http://bugs.freepascal.org/view.php?id=15841
  278. s:=HandleQuotedString;
  279. fLastTokenWStr:=fLastTokenWStr+UnicodeString(s);
  280. fLastTokenStr:=fLastTokenStr+s;
  281. end;
  282. '#' :
  283. begin
  284. HandleDecimalCharacter(ascii,w,c);
  285. fLastTokenWStr:=fLastTokenWStr+w;
  286. fLastTokenStr:=fLastTokenStr+c;
  287. end;
  288. else break;
  289. end;
  290. end;
  291. if ascii then
  292. fToken:={$IFDEF FPC_DOTTEDUNITS}System.{$ENDIF}Classes.toString
  293. else
  294. fToken:=toWString;
  295. end;
  296. procedure TParser.HandleMinus;
  297. begin
  298. inc(fPos);
  299. CheckLoadBuffer;
  300. if IsNumber then
  301. begin
  302. HandleNumber;
  303. fLastTokenStr:='-'+fLastTokenStr;
  304. end
  305. else
  306. begin
  307. fToken:='-';
  308. fLastTokenStr:=fToken;
  309. end;
  310. end;
  311. procedure TParser.HandleUnknown;
  312. begin
  313. fToken:=fBuf[fPos];
  314. fLastTokenStr:=fToken;
  315. inc(fPos);
  316. CheckLoadBuffer;
  317. end;
  318. constructor TParser.Create(Stream: TStream);
  319. begin
  320. fStream:=Stream;
  321. fBuf:=GetMem(ParseBufSize+1);
  322. fBufLen:=0;
  323. fPos:=0;
  324. fDeltaPos:=1;
  325. fSourceLine:=1;
  326. fEofReached:=false;
  327. fLastTokenStr:='';
  328. fLastTokenWStr:='';
  329. fFloatType:=#0;
  330. fToken:=#0;
  331. LoadBuffer;
  332. SkipBom;
  333. NextToken;
  334. end;
  335. destructor TParser.Destroy;
  336. Var
  337. aCount : Integer;
  338. begin
  339. if fToken=toWString then
  340. aCount:=Length(fLastTokenWStr)*2
  341. else
  342. aCount:=Length(fLastTokenStr);
  343. fStream.Position:=SourcePos-aCount;
  344. FreeMem(fBuf);
  345. end;
  346. procedure TParser.CheckToken(T: AnsiChar);
  347. begin
  348. if fToken<>T then
  349. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  350. end;
  351. procedure TParser.CheckTokenSymbol(const S: string);
  352. begin
  353. CheckToken(toSymbol);
  354. if CompareText(fLastTokenStr,S)<>0 then
  355. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  356. end;
  357. procedure TParser.Error(const Ident: string);
  358. begin
  359. ErrorStr(Ident);
  360. end;
  361. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  362. begin
  363. ErrorStr(Format(Ident,Args));
  364. end;
  365. procedure TParser.ErrorStr(const Message: string);
  366. begin
  367. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  368. end;
  369. procedure TParser.HexToBinary(Stream: TStream);
  370. var outbuf : array[0..ParseBufSize-1] of byte;
  371. b : byte;
  372. i : integer;
  373. begin
  374. i:=0;
  375. SkipWhitespace;
  376. while IsHexNum do
  377. begin
  378. b:=(GetHexValue(fBuf[fPos]) shl 4);
  379. inc(fPos);
  380. CheckLoadBuffer;
  381. if not IsHexNum then
  382. Error(SParUnterminatedBinValue);
  383. b:=b or GetHexValue(fBuf[fPos]);
  384. inc(fPos);
  385. CheckLoadBuffer;
  386. outbuf[i]:=b;
  387. inc(i);
  388. if i>=ParseBufSize then
  389. begin
  390. Stream.WriteBuffer(outbuf[0],i);
  391. i:=0;
  392. end;
  393. SkipWhitespace;
  394. end;
  395. if i>0 then
  396. Stream.WriteBuffer(outbuf[0],i);
  397. NextToken;
  398. end;
  399. function TParser.NextToken: AnsiChar;
  400. begin
  401. SkipWhiteSpace;
  402. if fEofReached then
  403. HandleEof
  404. else
  405. case fBuf[fPos] of
  406. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  407. '$' : HandleHexNumber;
  408. '-' : HandleMinus;
  409. '0'..'9' : HandleNumber;
  410. '''','#' : HandleString;
  411. else
  412. HandleUnknown;
  413. end;
  414. Result:=fToken;
  415. end;
  416. function TParser.SourcePos: Longint;
  417. begin
  418. Result:=fStream.Position-fBufLen+fPos;
  419. end;
  420. function TParser.TokenComponentIdent: string;
  421. begin
  422. if fToken<>toSymbol then
  423. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  424. CheckLoadBuffer;
  425. while fBuf[fPos]='.' do
  426. begin
  427. ProcessChar;
  428. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  429. end;
  430. Result:=fLastTokenStr;
  431. end;
  432. {$ifndef FPUNONE}
  433. Function TParser.TokenFloat: Extended;
  434. var errcode : word;
  435. begin
  436. Val(fLastTokenStr,Result,errcode);
  437. if errcode<>0 then
  438. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  439. end;
  440. {$endif}
  441. Function TParser.TokenInt: Int64;
  442. begin
  443. if not TryStrToInt64(fLastTokenStr,Result) then
  444. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  445. end;
  446. function TParser.TokenString: string;
  447. begin
  448. case fToken of
  449. toWString : Result:=string(fLastTokenWStr);
  450. toFloat : if fFloatType<>#0 then
  451. Result:=fLastTokenStr+fFloatType
  452. else Result:=fLastTokenStr;
  453. else
  454. Result:=fLastTokenStr;
  455. end;
  456. end;
  457. function TParser.TokenWideString: WideString;
  458. begin
  459. if fToken=toWString then
  460. Result:=fLastTokenWStr
  461. else
  462. Result:=WideString(fLastTokenStr);
  463. end;
  464. function TParser.TokenSymbolIs(const S: string): Boolean;
  465. begin
  466. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  467. end;