jsonscanner.pp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. {
  2. This file is part of the Free Component Library
  3. JSON source lexical scanner
  4. Copyright (c) 2007 by Michael Van Canneyt [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$mode objfpc}
  12. {$h+}
  13. unit jsonscanner;
  14. interface
  15. uses SysUtils, Classes;
  16. resourcestring
  17. SErrInvalidCharacter = 'Invalid character ''%s''';
  18. SErrOpenString = 'string exceeds end of line';
  19. type
  20. TJSONToken = (
  21. tkEOF,
  22. tkWhitespace,
  23. tkString,
  24. tkNumber,
  25. tkTrue,
  26. tkFalse,
  27. tkNull,
  28. // Simple (one-character) tokens
  29. tkComma, // ','
  30. tkColon, // ':'
  31. tkCurlyBraceOpen, // '{'
  32. tkCurlyBraceClose, // '}'
  33. tkSquaredBraceOpen, // '['
  34. tkSquaredBraceClose, // ']'
  35. tkUnknown
  36. );
  37. EScannerError = class(Exception);
  38. TJSONScanner = class
  39. private
  40. FSource : TStringList;
  41. FCurRow: Integer;
  42. FCurToken: TJSONToken;
  43. FCurTokenString: string;
  44. FCurLine: string;
  45. TokenStr: PChar;
  46. function GetCurColumn: Integer;
  47. protected
  48. procedure Error(const Msg: string);overload;
  49. procedure Error(const Msg: string; Args: array of Const);overload;
  50. function DoFetchToken: TJSONToken;
  51. public
  52. constructor Create(Source : TStream); overload;
  53. constructor Create(Source : String); overload;
  54. destructor Destroy; override;
  55. function FetchToken: TJSONToken;
  56. property CurLine: string read FCurLine;
  57. property CurRow: Integer read FCurRow;
  58. property CurColumn: Integer read GetCurColumn;
  59. property CurToken: TJSONToken read FCurToken;
  60. property CurTokenString: string read FCurTokenString;
  61. end;
  62. const
  63. TokenInfos: array[TJSONToken] of string = (
  64. 'EOF',
  65. 'Whitespace',
  66. 'String',
  67. 'Number',
  68. 'True',
  69. 'False',
  70. 'Null',
  71. ',',
  72. ':',
  73. '{',
  74. '}',
  75. '[',
  76. ']',
  77. ''
  78. );
  79. implementation
  80. constructor TJSONScanner.Create(Source : TStream);
  81. begin
  82. FSource:=TStringList.Create;
  83. FSource.LoadFromStream(Source);
  84. end;
  85. constructor TJSONScanner.Create(Source : String);
  86. begin
  87. FSource:=TStringList.Create;
  88. FSource.Text:=Source;
  89. end;
  90. destructor TJSONScanner.Destroy;
  91. begin
  92. FreeAndNil(FSource);
  93. Inherited;
  94. end;
  95. function TJSONScanner.FetchToken: TJSONToken;
  96. begin
  97. Result:=DoFetchToken;
  98. end;
  99. procedure TJSONScanner.Error(const Msg: string);
  100. begin
  101. raise EScannerError.Create(Msg);
  102. end;
  103. procedure TJSONScanner.Error(const Msg: string; Args: array of Const);
  104. begin
  105. raise EScannerError.CreateFmt(Msg, Args);
  106. end;
  107. function TJSONScanner.DoFetchToken: TJSONToken;
  108. function FetchLine: Boolean;
  109. begin
  110. Result:=FCurRow<FSource.Count;
  111. if Result then
  112. begin
  113. FCurLine:=FSource[FCurRow];
  114. TokenStr:=PChar(FCurLine);
  115. Inc(FCurRow);
  116. end
  117. else
  118. begin
  119. FCurLine:='';
  120. TokenStr:=nil;
  121. end;
  122. end;
  123. var
  124. TokenStart, CurPos: PChar;
  125. it : TJSONToken;
  126. I : Integer;
  127. OldLength, SectionLength, Index: Integer;
  128. S : String;
  129. begin
  130. if TokenStr = nil then
  131. if not FetchLine then
  132. begin
  133. Result := tkEOF;
  134. FCurToken := Result;
  135. exit;
  136. end;
  137. FCurTokenString := '';
  138. case TokenStr[0] of
  139. #0: // Empty line
  140. begin
  141. FetchLine;
  142. Result := tkWhitespace;
  143. end;
  144. #9, ' ':
  145. begin
  146. Result := tkWhitespace;
  147. repeat
  148. Inc(TokenStr);
  149. if TokenStr[0] = #0 then
  150. if not FetchLine then
  151. begin
  152. FCurToken := Result;
  153. exit;
  154. end;
  155. until not (TokenStr[0] in [#9, ' ']);
  156. end;
  157. '"':
  158. begin
  159. Inc(TokenStr);
  160. TokenStart := TokenStr;
  161. OldLength := 0;
  162. FCurTokenString := '';
  163. while not (TokenStr[0] in [#0,'"']) do
  164. begin
  165. if (TokenStr[0]='\') then
  166. begin
  167. // Save length
  168. SectionLength := TokenStr - TokenStart;
  169. Inc(TokenStr);
  170. // Read escaped token
  171. Case TokenStr[0] of
  172. '"' : S:='"';
  173. 't' : S:=#9;
  174. 'b' : S:=#8;
  175. 'n' : S:=#10;
  176. 'r' : S:=#13;
  177. 'f' : S:=#12;
  178. '\' : S:='\';
  179. '/' : S:='/';
  180. 'u' : begin
  181. S:='0000';
  182. For I:=1 to 4 do
  183. begin
  184. Inc(TokenStr);
  185. Case TokenStr[0] of
  186. '0'..'9','A'..'F','a'..'f' :
  187. S[i]:=Upcase(TokenStr[0]);
  188. else
  189. Error(SErrInvalidCharacter, [TokenStr[0]]);
  190. end;
  191. end;
  192. // Takes care of conversion...
  193. S:=WideChar(StrToInt('$'+S));
  194. end;
  195. #0 : Error(SErrOpenString);
  196. else
  197. Error(SErrInvalidCharacter, [TokenStr[0]]);
  198. end;
  199. SetLength(FCurTokenString, OldLength + SectionLength+1+Length(S));
  200. if SectionLength > 0 then
  201. Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
  202. Move(S[1],FCurTokenString[OldLength + SectionLength+1],Length(S));
  203. Inc(OldLength, SectionLength+Length(S));
  204. // Next char
  205. // Inc(TokenStr);
  206. TokenStart := TokenStr+1;
  207. end;
  208. if TokenStr[0] = #0 then
  209. Error(SErrOpenString);
  210. Inc(TokenStr);
  211. end;
  212. if TokenStr[0] = #0 then
  213. Error(SErrOpenString);
  214. SectionLength := TokenStr - TokenStart;
  215. SetLength(FCurTokenString, OldLength + SectionLength);
  216. if SectionLength > 0 then
  217. Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
  218. Inc(TokenStr);
  219. Result := tkString;
  220. end;
  221. ',':
  222. begin
  223. Inc(TokenStr);
  224. Result := tkComma;
  225. end;
  226. '0'..'9','-':
  227. begin
  228. TokenStart := TokenStr;
  229. while true do
  230. begin
  231. Inc(TokenStr);
  232. case TokenStr[0] of
  233. '.':
  234. begin
  235. if TokenStr[1] in ['0'..'9', 'e', 'E'] then
  236. begin
  237. Inc(TokenStr);
  238. repeat
  239. Inc(TokenStr);
  240. until not (TokenStr[0] in ['0'..'9', 'e', 'E','-','+']);
  241. end;
  242. break;
  243. end;
  244. '0'..'9': ;
  245. 'e', 'E':
  246. begin
  247. Inc(TokenStr);
  248. if TokenStr[0] in ['-','+'] then
  249. Inc(TokenStr);
  250. while TokenStr[0] in ['0'..'9'] do
  251. Inc(TokenStr);
  252. break;
  253. end;
  254. else
  255. break;
  256. end;
  257. end;
  258. SectionLength := TokenStr - TokenStart;
  259. SetLength(FCurTokenString, SectionLength);
  260. if SectionLength > 0 then
  261. Move(TokenStart^, FCurTokenString[1], SectionLength);
  262. Result := tkNumber;
  263. end;
  264. ':':
  265. begin
  266. Inc(TokenStr);
  267. Result := tkColon;
  268. end;
  269. '{':
  270. begin
  271. Inc(TokenStr);
  272. Result := tkCurlyBraceOpen;
  273. end;
  274. '}':
  275. begin
  276. Inc(TokenStr);
  277. Result := tkCurlyBraceClose;
  278. end;
  279. '[':
  280. begin
  281. Inc(TokenStr);
  282. Result := tkSquaredBraceOpen;
  283. end;
  284. ']':
  285. begin
  286. Inc(TokenStr);
  287. Result := tkSquaredBraceClose;
  288. end;
  289. 'T','t','F','f','N','n' :
  290. begin
  291. TokenStart := TokenStr;
  292. repeat
  293. Inc(TokenStr);
  294. until not (TokenStr[0] in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
  295. SectionLength := TokenStr - TokenStart;
  296. SetLength(FCurTokenString, SectionLength);
  297. if SectionLength > 0 then
  298. Move(TokenStart^, FCurTokenString[1], SectionLength);
  299. for it := tkTrue to tkNull do
  300. if CompareText(CurTokenString, TokenInfos[it]) = 0 then
  301. begin
  302. Result := it;
  303. FCurToken := Result;
  304. exit;
  305. end;
  306. Error(SErrInvalidCharacter, [TokenStart[0]]);
  307. end;
  308. else
  309. Error(SErrInvalidCharacter, [TokenStr[0]]);
  310. end;
  311. FCurToken := Result;
  312. end;
  313. function TJSONScanner.GetCurColumn: Integer;
  314. begin
  315. Result := TokenStr - PChar(CurLine);
  316. end;
  317. end.