parser.inc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 1999-2007 by the Free Pascal development team
  4. See the file COPYING.FPC, included in this distribution,
  5. for details about the copyright.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  9. **********************************************************************}
  10. {****************************************************************************}
  11. {* TParser *}
  12. {****************************************************************************}
  13. const
  14. {$ifdef CPU16}
  15. { Avoid too big local stack use for
  16. MSDOS tiny memory model that uses less than 4096
  17. bytes for total stack by default. }
  18. ParseBufSize = 512;
  19. {$else not CPU16}
  20. ParseBufSize = 4096;
  21. {$endif not CPU16}
  22. LastSpecialToken = 5;
  23. TokNames : array[0..LastSpecialToken] of string =
  24. (
  25. 'EOF',
  26. 'Symbol',
  27. 'String',
  28. 'Integer',
  29. 'Float',
  30. 'WideString'
  31. );
  32. function TParser.GetTokenName(aTok: char): string;
  33. begin
  34. if ord(aTok) <= LastSpecialToken then
  35. Result:=TokNames[ord(aTok)]
  36. else Result:=aTok;
  37. end;
  38. procedure TParser.LoadBuffer;
  39. var
  40. BytesRead: integer;
  41. begin
  42. BytesRead := FStream.Read(FBuf^, ParseBufSize);
  43. FBuf[BytesRead] := #0;
  44. Inc(FDeltaPos, BytesRead);
  45. FPos := 0;
  46. FBufLen := BytesRead;
  47. FEofReached:=BytesRead = 0;
  48. end;
  49. procedure TParser.CheckLoadBuffer; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  50. begin
  51. if fBuf[fPos]=#0 then LoadBuffer;
  52. end;
  53. procedure TParser.ProcessChar; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  54. begin
  55. fLastTokenStr:=fLastTokenStr+fBuf[fPos];
  56. inc(fPos);
  57. CheckLoadBuffer;
  58. end;
  59. function TParser.IsNumber: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  60. begin
  61. Result:=fBuf[fPos] in ['0'..'9'];
  62. end;
  63. function TParser.IsHexNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  64. begin
  65. Result:=fBuf[fPos] in ['0'..'9','A'..'F','a'..'f'];
  66. end;
  67. function TParser.IsAlpha: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  68. begin
  69. Result:=fBuf[fPos] in ['_','A'..'Z','a'..'z'];
  70. end;
  71. function TParser.IsAlphaNum: boolean; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  72. begin
  73. Result:=IsAlpha or IsNumber;
  74. end;
  75. function TParser.GetHexValue(c: char): byte; {$ifdef CLASSESINLINE} inline; {$endif CLASSESINLINE}
  76. begin
  77. case c of
  78. '0'..'9' : Result:=ord(c)-$30;
  79. 'A'..'F' : Result:=ord(c)-$37; //-$41+$0A
  80. 'a'..'f' : Result:=ord(c)-$57; //-$61+$0A
  81. end;
  82. end;
  83. function TParser.GetAlphaNum: string;
  84. begin
  85. if not IsAlpha then
  86. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  87. Result:='';
  88. while IsAlphaNum do
  89. begin
  90. Result:=Result+fBuf[fPos];
  91. inc(fPos);
  92. CheckLoadBuffer;
  93. end;
  94. end;
  95. procedure TParser.HandleNewLine;
  96. begin
  97. if fBuf[fPos]=#13 then //CR
  98. begin
  99. inc(fPos);
  100. CheckLoadBuffer;
  101. end;
  102. if fBuf[fPos]=#10 then
  103. begin
  104. inc(fPos); //CR LF or LF
  105. CheckLoadBuffer;
  106. end;
  107. inc(fSourceLine);
  108. fDeltaPos:=-(fPos-1);
  109. end;
  110. procedure TParser.SkipBOM;
  111. var
  112. i : integer;
  113. bom : string[3];
  114. backup : integer;
  115. begin
  116. i:=1;
  117. bom:=' ';
  118. backup:=fPos;
  119. while (fBuf[fPos] in [#$BB,#$BF,#$EF]) and (i<=3) do
  120. begin
  121. bom[i]:=fBuf[fPos];
  122. inc(fPos);
  123. CheckLoadBuffer;
  124. inc(i);
  125. end;
  126. if (bom<>(#$EF+#$BB+#$BF)) then
  127. fPos:=backup;
  128. end;
  129. procedure TParser.SkipSpaces;
  130. begin
  131. while fBuf[fPos] in [' ',#9] do begin
  132. inc(fPos);
  133. CheckLoadBuffer;
  134. end;
  135. end;
  136. procedure TParser.SkipWhitespace;
  137. begin
  138. while true do
  139. begin
  140. case fBuf[fPos] of
  141. ' ',#9 : SkipSpaces;
  142. #10,#13 : HandleNewLine
  143. else break;
  144. end;
  145. end;
  146. end;
  147. procedure TParser.HandleEof;
  148. begin
  149. fToken:=toEOF;
  150. fLastTokenStr:='';
  151. end;
  152. procedure TParser.HandleAlphaNum;
  153. begin
  154. fLastTokenStr:=GetAlphaNum;
  155. fToken:=toSymbol;
  156. end;
  157. procedure TParser.HandleNumber;
  158. type
  159. floatPunct = (fpDot,fpE);
  160. floatPuncts = set of floatPunct;
  161. var
  162. allowed : floatPuncts;
  163. begin
  164. fLastTokenStr:='';
  165. while IsNumber do
  166. ProcessChar;
  167. fToken:=toInteger;
  168. if (fBuf[fPos] in ['.','e','E']) then
  169. begin
  170. fToken:=toFloat;
  171. allowed:=[fpDot,fpE];
  172. while (fBuf[fPos] in ['.','e','E','0'..'9']) do
  173. begin
  174. case fBuf[fPos] of
  175. '.' : if fpDot in allowed then Exclude(allowed,fpDot) else break;
  176. 'E','e' : if fpE in allowed then
  177. begin
  178. allowed:=[];
  179. ProcessChar;
  180. if (fBuf[fPos] in ['+','-']) then ProcessChar;
  181. if not (fBuf[fPos] in ['0'..'9']) then
  182. ErrorFmt(SParInvalidFloat,[fLastTokenStr+fBuf[fPos]]);
  183. end
  184. else break;
  185. end;
  186. ProcessChar;
  187. end;
  188. end;
  189. if (fBuf[fPos] in ['s','S','d','D','c','C']) then //single, date, currency
  190. begin
  191. fFloatType:=fBuf[fPos];
  192. inc(fPos);
  193. CheckLoadBuffer;
  194. fToken:=toFloat;
  195. end
  196. else fFloatType:=#0;
  197. end;
  198. procedure TParser.HandleHexNumber;
  199. var valid : boolean;
  200. begin
  201. fLastTokenStr:='$';
  202. inc(fPos);
  203. CheckLoadBuffer;
  204. valid:=false;
  205. while IsHexNum do
  206. begin
  207. valid:=true;
  208. ProcessChar;
  209. end;
  210. if not valid then
  211. ErrorFmt(SParInvalidInteger,[fLastTokenStr]);
  212. fToken:=toInteger;
  213. end;
  214. function TParser.HandleQuotedString: string;
  215. begin
  216. Result:='';
  217. inc(fPos);
  218. CheckLoadBuffer;
  219. while true do
  220. begin
  221. case fBuf[fPos] of
  222. #0 : ErrorStr(SParUnterminatedString);
  223. #13,#10 : ErrorStr(SParUnterminatedString);
  224. '''' : begin
  225. inc(fPos);
  226. CheckLoadBuffer;
  227. if fBuf[fPos]<>'''' then exit;
  228. end;
  229. end;
  230. Result:=Result+fBuf[fPos];
  231. inc(fPos);
  232. CheckLoadBuffer;
  233. end;
  234. end;
  235. procedure TParser.HandleDecimalCharacter(var ascii: boolean; out
  236. WideChr: widechar; out StringChr: char);
  237. var i : integer;
  238. begin
  239. inc(fPos);
  240. CheckLoadBuffer;
  241. // read a word number
  242. i:=0;
  243. while IsNumber and (i<high(word)) do
  244. begin
  245. i:=i*10+ord(fBuf[fPos])-ord('0');
  246. inc(fPos);
  247. CheckLoadBuffer;
  248. end;
  249. if i>high(word) then i:=0;
  250. if i>127 then ascii:=false;
  251. WideChr:=widechar(word(i));
  252. if i<256 then
  253. StringChr:=chr(i)
  254. else
  255. StringChr:=#0;
  256. end;
  257. procedure TParser.HandleString;
  258. var ascii : boolean;
  259. s: string;
  260. w: WideChar;
  261. c: char;
  262. begin
  263. fLastTokenWStr:='';
  264. fLastTokenStr:='';
  265. ascii:=true;
  266. while true do
  267. begin
  268. case fBuf[fPos] of
  269. '''' :
  270. begin
  271. // avoid conversions,
  272. // On some systems conversion from ansistring to widestring and back
  273. // to ansistring does not give the original ansistring.
  274. // See bug http://bugs.freepascal.org/view.php?id=15841
  275. s:=HandleQuotedString;
  276. fLastTokenWStr:=fLastTokenWStr+UnicodeString(s);
  277. fLastTokenStr:=fLastTokenStr+s;
  278. end;
  279. '#' :
  280. begin
  281. HandleDecimalCharacter(ascii,w,c);
  282. fLastTokenWStr:=fLastTokenWStr+w;
  283. fLastTokenStr:=fLastTokenStr+c;
  284. end;
  285. else break;
  286. end;
  287. end;
  288. if ascii then
  289. fToken:=Classes.toString
  290. else
  291. fToken:=toWString;
  292. end;
  293. procedure TParser.HandleMinus;
  294. begin
  295. inc(fPos);
  296. CheckLoadBuffer;
  297. if IsNumber then
  298. begin
  299. HandleNumber;
  300. fLastTokenStr:='-'+fLastTokenStr;
  301. end
  302. else
  303. begin
  304. fToken:='-';
  305. fLastTokenStr:=fToken;
  306. end;
  307. end;
  308. procedure TParser.HandleUnknown;
  309. begin
  310. fToken:=fBuf[fPos];
  311. fLastTokenStr:=fToken;
  312. inc(fPos);
  313. CheckLoadBuffer;
  314. end;
  315. constructor TParser.Create(Stream: TStream);
  316. begin
  317. fStream:=Stream;
  318. fBuf:=GetMem(ParseBufSize+1);
  319. fBufLen:=0;
  320. fPos:=0;
  321. fDeltaPos:=1;
  322. fSourceLine:=1;
  323. fEofReached:=false;
  324. fLastTokenStr:='';
  325. fLastTokenWStr:='';
  326. fFloatType:=#0;
  327. fToken:=#0;
  328. LoadBuffer;
  329. SkipBom;
  330. NextToken;
  331. end;
  332. destructor TParser.Destroy;
  333. begin
  334. fStream.Position:=SourcePos;
  335. FreeMem(fBuf);
  336. end;
  337. procedure TParser.CheckToken(T: Char);
  338. begin
  339. if fToken<>T then
  340. ErrorFmt(SParWrongTokenType,[GetTokenName(T),GetTokenName(fToken)]);
  341. end;
  342. procedure TParser.CheckTokenSymbol(const S: string);
  343. begin
  344. CheckToken(toSymbol);
  345. if CompareText(fLastTokenStr,S)<>0 then
  346. ErrorFmt(SParWrongTokenSymbol,[s,fLastTokenStr]);
  347. end;
  348. procedure TParser.Error(const Ident: string);
  349. begin
  350. ErrorStr(Ident);
  351. end;
  352. procedure TParser.ErrorFmt(const Ident: string; const Args: array of const);
  353. begin
  354. ErrorStr(Format(Ident,Args));
  355. end;
  356. procedure TParser.ErrorStr(const Message: string);
  357. begin
  358. raise EParserError.CreateFmt(Message+SParLocInfo,[SourceLine,fPos+fDeltaPos,SourcePos]);
  359. end;
  360. procedure TParser.HexToBinary(Stream: TStream);
  361. var outbuf : array[0..ParseBufSize-1] of byte;
  362. b : byte;
  363. i : integer;
  364. begin
  365. i:=0;
  366. SkipWhitespace;
  367. while IsHexNum do
  368. begin
  369. b:=(GetHexValue(fBuf[fPos]) shl 4);
  370. inc(fPos);
  371. CheckLoadBuffer;
  372. if not IsHexNum then
  373. Error(SParUnterminatedBinValue);
  374. b:=b or GetHexValue(fBuf[fPos]);
  375. inc(fPos);
  376. CheckLoadBuffer;
  377. outbuf[i]:=b;
  378. inc(i);
  379. if i>=ParseBufSize then
  380. begin
  381. Stream.WriteBuffer(outbuf[0],i);
  382. i:=0;
  383. end;
  384. SkipWhitespace;
  385. end;
  386. if i>0 then
  387. Stream.WriteBuffer(outbuf[0],i);
  388. NextToken;
  389. end;
  390. function TParser.NextToken: Char;
  391. begin
  392. SkipWhiteSpace;
  393. if fEofReached then
  394. HandleEof
  395. else
  396. case fBuf[fPos] of
  397. '_','A'..'Z','a'..'z' : HandleAlphaNum;
  398. '$' : HandleHexNumber;
  399. '-' : HandleMinus;
  400. '0'..'9' : HandleNumber;
  401. '''','#' : HandleString
  402. else
  403. HandleUnknown;
  404. end;
  405. Result:=fToken;
  406. end;
  407. function TParser.SourcePos: Longint;
  408. begin
  409. Result:=fStream.Position-fBufLen+fPos;
  410. end;
  411. function TParser.TokenComponentIdent: string;
  412. begin
  413. if fToken<>toSymbol then
  414. ErrorFmt(SParExpected,[GetTokenName(toSymbol)]);
  415. CheckLoadBuffer;
  416. while fBuf[fPos]='.' do
  417. begin
  418. ProcessChar;
  419. fLastTokenStr:=fLastTokenStr+GetAlphaNum;
  420. end;
  421. Result:=fLastTokenStr;
  422. end;
  423. {$ifndef FPUNONE}
  424. Function TParser.TokenFloat: Extended;
  425. var errcode : word;
  426. begin
  427. Val(fLastTokenStr,Result,errcode);
  428. if errcode<>0 then
  429. ErrorFmt(SParInvalidFloat,[fLastTokenStr]);
  430. end;
  431. {$endif}
  432. Function TParser.TokenInt: Int64;
  433. begin
  434. if not TryStrToInt64(fLastTokenStr,Result) then
  435. Result:=Int64(StrToQWord(fLastTokenStr)); //second chance for malformed files
  436. end;
  437. function TParser.TokenString: string;
  438. begin
  439. case fToken of
  440. toWString : Result:=string(fLastTokenWStr);
  441. toFloat : if fFloatType<>#0 then
  442. Result:=fLastTokenStr+fFloatType
  443. else Result:=fLastTokenStr
  444. else
  445. Result:=fLastTokenStr;
  446. end;
  447. end;
  448. function TParser.TokenWideString: WideString;
  449. begin
  450. if fToken=toWString then
  451. Result:=fLastTokenWStr
  452. else
  453. Result:=WideString(fLastTokenStr);
  454. end;
  455. function TParser.TokenSymbolIs(const S: string): Boolean;
  456. begin
  457. Result:=(fToken=toSymbol) and (CompareText(fLastTokenStr,S)=0);
  458. end;