jsonscanner.pp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. {
  2. This file is part of the Free Component Library
  3. JSON source lexical scanner
  4. Copyright (c) 2007 by Michael Van Canneyt [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$mode objfpc}
  12. {$h+}
  13. unit jsonscanner;
  14. interface
  15. uses SysUtils, Classes;
  16. resourcestring
  17. SErrInvalidCharacter = 'Invalid character at line %d, pos %d: ''%s''';
  18. SErrOpenString = 'string exceeds end of line';
  19. type
  20. TJSONToken = (
  21. tkEOF,
  22. tkWhitespace,
  23. tkString,
  24. tkNumber,
  25. tkTrue,
  26. tkFalse,
  27. tkNull,
  28. // Simple (one-character) tokens
  29. tkComma, // ','
  30. tkColon, // ':'
  31. tkCurlyBraceOpen, // '{'
  32. tkCurlyBraceClose, // '}'
  33. tkSquaredBraceOpen, // '['
  34. tkSquaredBraceClose, // ']'
  35. tkIdentifier, // Any Javascript identifier
  36. tkUnknown
  37. );
  38. EScannerError = class(EParserError);
  39. { TJSONScanner }
  40. TJSONScanner = class
  41. private
  42. FSource : TStringList;
  43. FCurRow: Integer;
  44. FCurToken: TJSONToken;
  45. FCurTokenString: string;
  46. FCurLine: string;
  47. FStrict: Boolean;
  48. TokenStr: PChar;
  49. function GetCurColumn: Integer;
  50. protected
  51. procedure Error(const Msg: string);overload;
  52. procedure Error(const Msg: string; Args: array of Const);overload;
  53. function DoFetchToken: TJSONToken;
  54. public
  55. constructor Create(Source : TStream); overload;
  56. constructor Create(const Source : String); overload;
  57. destructor Destroy; override;
  58. function FetchToken: TJSONToken;
  59. property CurLine: string read FCurLine;
  60. property CurRow: Integer read FCurRow;
  61. property CurColumn: Integer read GetCurColumn;
  62. property CurToken: TJSONToken read FCurToken;
  63. property CurTokenString: string read FCurTokenString;
  64. // Use strict JSON: " for strings, object members are strings, not identifiers
  65. Property Strict : Boolean Read FStrict Write FStrict;
  66. end;
  67. const
  68. TokenInfos: array[TJSONToken] of string = (
  69. 'EOF',
  70. 'Whitespace',
  71. 'String',
  72. 'Number',
  73. 'True',
  74. 'False',
  75. 'Null',
  76. ',',
  77. ':',
  78. '{',
  79. '}',
  80. '[',
  81. ']',
  82. 'identifier',
  83. ''
  84. );
  85. implementation
  86. constructor TJSONScanner.Create(Source : TStream);
  87. begin
  88. FSource:=TStringList.Create;
  89. FSource.LoadFromStream(Source);
  90. end;
  91. constructor TJSONScanner.Create(const Source : String);
  92. begin
  93. FSource:=TStringList.Create;
  94. FSource.Text:=Source;
  95. end;
  96. destructor TJSONScanner.Destroy;
  97. begin
  98. FreeAndNil(FSource);
  99. Inherited;
  100. end;
  101. function TJSONScanner.FetchToken: TJSONToken;
  102. begin
  103. Result:=DoFetchToken;
  104. end;
  105. procedure TJSONScanner.Error(const Msg: string);
  106. begin
  107. raise EScannerError.Create(Msg);
  108. end;
  109. procedure TJSONScanner.Error(const Msg: string; Args: array of Const);
  110. begin
  111. raise EScannerError.CreateFmt(Msg, Args);
  112. end;
  113. function TJSONScanner.DoFetchToken: TJSONToken;
  114. function FetchLine: Boolean;
  115. begin
  116. Result:=FCurRow<FSource.Count;
  117. if Result then
  118. begin
  119. FCurLine:=FSource[FCurRow];
  120. TokenStr:=PChar(FCurLine);
  121. Inc(FCurRow);
  122. end
  123. else
  124. begin
  125. FCurLine:='';
  126. TokenStr:=nil;
  127. end;
  128. end;
  129. var
  130. TokenStart, CurPos: PChar;
  131. it : TJSONToken;
  132. I : Integer;
  133. OldLength, SectionLength, Index: Integer;
  134. C : char;
  135. S : String;
  136. begin
  137. if TokenStr = nil then
  138. if not FetchLine then
  139. begin
  140. Result := tkEOF;
  141. FCurToken := Result;
  142. exit;
  143. end;
  144. FCurTokenString := '';
  145. case TokenStr[0] of
  146. #0: // Empty line
  147. begin
  148. FetchLine;
  149. Result := tkWhitespace;
  150. end;
  151. #9, ' ':
  152. begin
  153. Result := tkWhitespace;
  154. repeat
  155. Inc(TokenStr);
  156. if TokenStr[0] = #0 then
  157. if not FetchLine then
  158. begin
  159. FCurToken := Result;
  160. exit;
  161. end;
  162. until not (TokenStr[0] in [#9, ' ']);
  163. end;
  164. '"','''':
  165. begin
  166. C:=TokenStr[0];
  167. If (C='''') and Strict then
  168. Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
  169. Inc(TokenStr);
  170. TokenStart := TokenStr;
  171. OldLength := 0;
  172. FCurTokenString := '';
  173. while not (TokenStr[0] in [#0,C]) do
  174. begin
  175. if (TokenStr[0]='\') then
  176. begin
  177. // Save length
  178. SectionLength := TokenStr - TokenStart;
  179. Inc(TokenStr);
  180. // Read escaped token
  181. Case TokenStr[0] of
  182. '"' : S:='"';
  183. '''' : S:='''';
  184. 't' : S:=#9;
  185. 'b' : S:=#8;
  186. 'n' : S:=#10;
  187. 'r' : S:=#13;
  188. 'f' : S:=#12;
  189. '\' : S:='\';
  190. '/' : S:='/';
  191. 'u' : begin
  192. S:='0000';
  193. For I:=1 to 4 do
  194. begin
  195. Inc(TokenStr);
  196. Case TokenStr[0] of
  197. '0'..'9','A'..'F','a'..'f' :
  198. S[i]:=Upcase(TokenStr[0]);
  199. else
  200. Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
  201. end;
  202. end;
  203. // Takes care of conversion...
  204. S:=WideChar(StrToInt('$'+S));
  205. end;
  206. #0 : Error(SErrOpenString);
  207. else
  208. Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
  209. end;
  210. SetLength(FCurTokenString, OldLength + SectionLength+1+Length(S));
  211. if SectionLength > 0 then
  212. Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
  213. Move(S[1],FCurTokenString[OldLength + SectionLength+1],Length(S));
  214. Inc(OldLength, SectionLength+Length(S));
  215. // Next char
  216. // Inc(TokenStr);
  217. TokenStart := TokenStr+1;
  218. end;
  219. if TokenStr[0] = #0 then
  220. Error(SErrOpenString);
  221. Inc(TokenStr);
  222. end;
  223. if TokenStr[0] = #0 then
  224. Error(SErrOpenString);
  225. SectionLength := TokenStr - TokenStart;
  226. SetLength(FCurTokenString, OldLength + SectionLength);
  227. if SectionLength > 0 then
  228. Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
  229. Inc(TokenStr);
  230. Result := tkString;
  231. end;
  232. ',':
  233. begin
  234. Inc(TokenStr);
  235. Result := tkComma;
  236. end;
  237. '0'..'9','.','-':
  238. begin
  239. TokenStart := TokenStr;
  240. while true do
  241. begin
  242. Inc(TokenStr);
  243. case TokenStr[0] of
  244. '.':
  245. begin
  246. if TokenStr[1] in ['0'..'9', 'e', 'E'] then
  247. begin
  248. Inc(TokenStr);
  249. repeat
  250. Inc(TokenStr);
  251. until not (TokenStr[0] in ['0'..'9', 'e', 'E','-','+']);
  252. end;
  253. break;
  254. end;
  255. '0'..'9': ;
  256. 'e', 'E':
  257. begin
  258. Inc(TokenStr);
  259. if TokenStr[0] in ['-','+'] then
  260. Inc(TokenStr);
  261. while TokenStr[0] in ['0'..'9'] do
  262. Inc(TokenStr);
  263. break;
  264. end;
  265. else
  266. break;
  267. end;
  268. end;
  269. SectionLength := TokenStr - TokenStart;
  270. SetLength(FCurTokenString, SectionLength);
  271. if SectionLength > 0 then
  272. Move(TokenStart^, FCurTokenString[1], SectionLength);
  273. If (FCurTokenString[1]='.') then
  274. FCurTokenString:='0'+FCurTokenString;
  275. Result := tkNumber;
  276. end;
  277. ':':
  278. begin
  279. Inc(TokenStr);
  280. Result := tkColon;
  281. end;
  282. '{':
  283. begin
  284. Inc(TokenStr);
  285. Result := tkCurlyBraceOpen;
  286. end;
  287. '}':
  288. begin
  289. Inc(TokenStr);
  290. Result := tkCurlyBraceClose;
  291. end;
  292. '[':
  293. begin
  294. Inc(TokenStr);
  295. Result := tkSquaredBraceOpen;
  296. end;
  297. ']':
  298. begin
  299. Inc(TokenStr);
  300. Result := tkSquaredBraceClose;
  301. end;
  302. 'a'..'z','_':
  303. begin
  304. TokenStart := TokenStr;
  305. repeat
  306. Inc(TokenStr);
  307. until not (TokenStr[0] in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
  308. SectionLength := TokenStr - TokenStart;
  309. SetLength(FCurTokenString, SectionLength);
  310. if SectionLength > 0 then
  311. Move(TokenStart^, FCurTokenString[1], SectionLength);
  312. for it := tkTrue to tkNull do
  313. if CompareText(CurTokenString, TokenInfos[it]) = 0 then
  314. begin
  315. Result := it;
  316. FCurToken := Result;
  317. exit;
  318. end;
  319. if Strict then
  320. Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]])
  321. else
  322. Result:=tkIdentifier;
  323. end;
  324. else
  325. Error(SErrInvalidCharacter, [CurRow,CurCOlumn,TokenStr[0]]);
  326. end;
  327. FCurToken := Result;
  328. end;
  329. function TJSONScanner.GetCurColumn: Integer;
  330. begin
  331. Result := TokenStr - PChar(CurLine);
  332. end;
  333. end.