cldrtxt.pas 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. { Parser of the CLDR collation tailoring files.
  2. This parser handle the textual syntax for CLDR version > 23
  3. Copyright (c) 2014,2015 by Inoussa OUEDRAOGO
  4. The source code is distributed under the Library GNU
  5. General Public License with the following modification:
  6. - object files and libraries linked into an application may be
  7. distributed without source code.
  8. If you didn't receive a copy of the file COPYING, contact:
  9. Free Software Foundation
  10. 675 Mass Ave
  11. Cambridge, MA 02139
  12. USA
  13. This program is distributed in the hope that it will be useful,
  14. but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16. }
  17. unit cldrtxt;
  18. {$mode delphi}
  19. {$H+}
  20. {$SCOPEDENUMS ON}
  21. {$TypedAddress on}
  22. interface
  23. uses
  24. Classes, SysUtils,
  25. cldrhelper, helper;
  26. procedure ParseInitialDocument(
  27. ASequence : POrderedCharacters;
  28. ADoc : TCustomMemoryStream;
  29. ASettings : TSettingRecArray
  30. );overload;
  31. procedure ParseInitialDocument(
  32. ASequence : POrderedCharacters;
  33. AFileName : string;
  34. ASettings : TSettingRecArray
  35. );overload;
  36. const
  37. SETTING_WITH_UNICODESET = [
  38. TSettingOption.SuppressContractions, TSettingOption.Optimize
  39. ];
  40. SETTING_OPTION_STRINGS : // Lower case !
  41. array[Succ(TSettingOption.Unknown)..High(TSettingOption)] of UTF8String = (
  42. 'strength', 'alternate', 'backwards', 'normalization', 'caselevel', 'casefirst',
  43. 'hiraganaq', 'numericordering', 'reorder', 'maxvariable', 'import',
  44. 'suppresscontractions', 'optimize'
  45. );
  46. type
  47. TStatementKind = (Sequence, Setting);
  48. TParsedStatement = record
  49. Kind : TStatementKind;
  50. ReorderSequence : TReorderSequence;
  51. Setting : TSettingRec;
  52. end;
  53. PParsedStatement = ^TParsedStatement;
  54. function ParseStatement(
  55. AData : PAnsiChar;
  56. AStartPosition,
  57. AMaxLen : Integer;
  58. AStatement : PParsedStatement;
  59. var ANextPos,
  60. ALineCount : Integer
  61. ) : Boolean;
  62. procedure Clear(var AItem : TParsedStatement);
  63. procedure AddItem(var AList : TSettingRecArray; const AItem : PSettingRec);
  64. implementation
  65. uses
  66. unicodedata;
  67. const
  68. s_BEFORE = 'before';
  69. function String2UnicodeCodePointArray(const AValue : UTF8String): TUnicodeCodePointArray;
  70. var
  71. u4str : UCS4String;
  72. k : Integer;
  73. begin
  74. if (Length(AValue) = 0) then
  75. exit(nil);
  76. if (Length(AValue) = 1) then begin
  77. SetLength(Result,1);
  78. Result[0] := Ord(AValue[1])
  79. end else begin
  80. u4str := UnicodeStringToUCS4String(UTF8Decode(AValue));
  81. k := Length(u4str) - 1; // remove the last #0
  82. SetLength(Result,k);
  83. for k := 0 to k - 1 do
  84. Result[k] := u4str[k];
  85. end;
  86. end;
  87. function TryStringToReorderWeigthKind(
  88. const AStr : UTF8String;
  89. out AResult : TReorderWeigthKind
  90. ) : Boolean;
  91. begin
  92. Result := True;
  93. if (AStr = '=') then
  94. AResult := TReorderWeigthKind.Identity
  95. else if (AStr = '<') or (AStr = '>') then
  96. AResult := TReorderWeigthKind.Primary
  97. else if (AStr = '<<') or (AStr = '>>') then
  98. AResult := TReorderWeigthKind.Secondary
  99. else if (AStr = '<<<') or (AStr = '>>>') then
  100. AResult := TReorderWeigthKind.Tertiary
  101. else if (AStr = '<<<<') or (AStr = '>>>>') then
  102. {Quaternary level is treated as Identity !}
  103. AResult := TReorderWeigthKind.Identity
  104. else begin
  105. AResult := TReorderWeigthKind.Identity;
  106. Result := False;
  107. end;
  108. end;
  109. function StringToSettingOption(const AStr : UTF8String) : TSettingOption;
  110. var
  111. e : TSettingOption;
  112. s : UTF8String;
  113. begin
  114. s := LowerCase(AStr);
  115. for e := Succ(TSettingOption.Unknown) to High(TSettingOption) do begin
  116. if (s = SETTING_OPTION_STRINGS[e]) then
  117. exit(e);
  118. end;
  119. Result := TSettingOption.Unknown;
  120. end;
  121. procedure Clear(var AItem : TParsedStatement);
  122. begin
  123. AItem.Setting.Clear();
  124. AItem.ReorderSequence.Clear();
  125. AItem.Kind := TStatementKind(0);
  126. end;
  127. procedure AddItem(var AList : TSettingRecArray; const AItem : PSettingRec);
  128. var
  129. c : Integer;
  130. begin
  131. c := Length(AList);
  132. SetLength(AList,(c+1));
  133. AList[c].Assign(AItem);
  134. end;
  135. procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);
  136. begin
  137. AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
  138. ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
  139. end;
  140. function ParseStatement(
  141. AData : PAnsiChar;
  142. AStartPosition,
  143. AMaxLen : Integer;
  144. AStatement : PParsedStatement;
  145. var ANextPos,
  146. ALineCount : Integer
  147. ) : Boolean;
  148. const
  149. LINE_LENGTH = 1024;
  150. var
  151. p : PAnsiChar;
  152. bufferLength, bufferPos, lineLength, linePos, lineIndex : Integer;
  153. line : UTF8String;
  154. statement : PReorderSequence;
  155. elementActualCount : Integer;
  156. specialChararter : Boolean;
  157. historyItemIndex : Integer;
  158. historyItems : array[0..31] of record
  159. p : PAnsiChar;
  160. bufferLength,
  161. bufferPos,
  162. lineLength,
  163. linePos,
  164. lineIndex : Integer;
  165. line : UTF8String;
  166. end;
  167. procedure SaveState();
  168. begin
  169. if (historyItemIndex >= High(historyItems)) then
  170. raise Exception.Create('History buffer is full.');
  171. historyItemIndex := historyItemIndex+1;
  172. historyItems[historyItemIndex].p := p;
  173. historyItems[historyItemIndex].bufferLength := bufferLength;
  174. historyItems[historyItemIndex].bufferPos := bufferPos;
  175. historyItems[historyItemIndex].lineLength := lineLength;
  176. historyItems[historyItemIndex].linePos := linePos;
  177. historyItems[historyItemIndex].lineIndex := lineIndex;
  178. historyItems[historyItemIndex].line := line;
  179. end;
  180. procedure RestoreState();
  181. begin
  182. if (historyItemIndex < 0) then
  183. raise Exception.Create('History buffer is empty.');
  184. p := historyItems[historyItemIndex].p;
  185. bufferLength := historyItems[historyItemIndex].bufferLength;
  186. bufferPos := historyItems[historyItemIndex].bufferPos;
  187. lineLength := historyItems[historyItemIndex].lineLength;
  188. linePos := historyItems[historyItemIndex].linePos;
  189. lineIndex := historyItems[historyItemIndex].lineIndex;
  190. line := historyItems[historyItemIndex].line;
  191. historyItemIndex := historyItemIndex-1;
  192. end;
  193. procedure DiscardState();
  194. begin
  195. if (historyItemIndex < 0) then
  196. raise Exception.Create('History buffer is empty.');
  197. historyItemIndex := historyItemIndex-1;
  198. end;
  199. function CurrentLine() : UTF8String; inline;
  200. begin
  201. Result := Copy(line,1,lineLength);
  202. end;
  203. function NextLine() : Boolean;
  204. var
  205. locOldPos : Integer;
  206. locOldPointer : PAnsiChar;
  207. begin
  208. Result := False;
  209. if (p^ = #10) then begin
  210. Inc(p);
  211. Inc(bufferPos);
  212. end;
  213. locOldPos := bufferPos;
  214. locOldPointer := p;
  215. while (bufferPos < bufferLength) and (p^ <> #10) do begin
  216. Inc(p);
  217. Inc(bufferPos);
  218. end;
  219. if (locOldPos = bufferPos) and (p^ = #10) then begin
  220. lineLength := 0;
  221. Inc(p);
  222. Inc(bufferPos);
  223. linePos := 1;
  224. Result := True;
  225. end else if (locOldPos < bufferPos) then begin
  226. lineLength := (bufferPos - locOldPos);
  227. if (lineLength >= Length(line)) then
  228. SetLength(line,(2*lineLength));
  229. Move(locOldPointer^,line[1],lineLength);
  230. {if (p^ = #10) then begin
  231. //Dec(lineLength);
  232. Inc(p);
  233. Inc(bufferPos);
  234. end;}
  235. linePos := 1;
  236. Result := True;
  237. end;
  238. if Result and (locOldPos < bufferPos) then
  239. lineIndex := lineIndex+1;
  240. end;
  241. procedure CheckLineLength(const ALength : Integer);
  242. begin
  243. if (ALength > lineLength) then
  244. raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]);
  245. end;
  246. function ReadChar(out AResult : UTF8String) : Boolean;
  247. var
  248. k : Integer;
  249. us : UnicodeString;
  250. begin
  251. AResult := '';
  252. Result := False;
  253. if (linePos > lineLength) then
  254. exit;
  255. {if CharInSet(line[linePos],['#','=','&','[',']']) then begin
  256. AResult := line[linePos];
  257. Inc(linePos);
  258. exit(True);
  259. end;}
  260. if (line[linePos] <> '\') then begin
  261. AResult := line[linePos];
  262. Inc(linePos);
  263. exit(True);
  264. end;
  265. CheckLineLength(linePos+1);
  266. Inc(linePos);
  267. case line[linePos] of
  268. '''': begin
  269. AResult := '\';
  270. exit(True);
  271. end;
  272. '\' : begin
  273. AResult := '\';
  274. Inc(linePos);
  275. exit(True);
  276. end;
  277. 'u' : begin
  278. CheckLineLength(linePos+4);
  279. AResult := '$'+Copy(line,(linePos+1),4);
  280. if not TryStrToInt(AResult,k) then
  281. raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s", line = "%s".',[AResult,CurrentLine()]);
  282. SetLength(us,1);
  283. us[1] := UnicodeChar(k);
  284. AResult := UTF8Encode(us);
  285. Inc(linePos,5);
  286. exit(True);
  287. end;
  288. 'U' : begin
  289. CheckLineLength(linePos+8);
  290. AResult := '$'+Copy(line,(linePos+1),8);
  291. if not TryStrToInt(AResult,k) then
  292. raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s".',[AResult]);
  293. if (k > High(Word)) then begin
  294. SetLength(us,2);
  295. FromUCS4(k,us[1],us[2]);
  296. if (Ord(us[2]) = 0) then
  297. SetLength(us,1);
  298. end else begin
  299. SetLength(us,1);
  300. us[1] := UnicodeChar(k);
  301. end;
  302. AResult := UTF8Encode(us);
  303. Inc(linePos,9);
  304. exit(True);
  305. end;
  306. else
  307. raise Exception.CreateFmt('Invalide escaped string "%s", at %d position.',[CurrentLine(),linePos]);
  308. end;
  309. end;
  310. function ReadQuotedString() : UTF8String;
  311. var
  312. ks : UTF8String;
  313. begin
  314. if (line[linePos] <> '''') then
  315. raise Exception.CreateFmt('Unexpected character found "%s", a quote expected: "%s".',[line[linePos],CurrentLine()]);
  316. Inc(linePos);
  317. if (linePos > lineLength) then
  318. raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[CurrentLine()]);
  319. if (line[linePos] = '''') then begin
  320. Inc(linePos);
  321. Result := '''';
  322. exit;
  323. end;
  324. Result := '';
  325. while (linePos <= lineLength) and ReadChar(ks) do begin
  326. Result := Result + ks;
  327. if (line[linePos] = '''') then
  328. break;
  329. end;
  330. if (line[linePos] = '''') then begin
  331. Inc(linePos);
  332. exit;
  333. end;
  334. raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[line]);
  335. end;
  336. function ReadUnQuotedString() : UTF8String;
  337. var
  338. k : Integer;
  339. begin
  340. k := linePos;
  341. while (linePos <= lineLength) and
  342. not(CharInSet(line[linePos],[' ',#9,'#', '=','&','[',']','<','>','''','/','|']))
  343. do begin
  344. Inc(linePos);
  345. end;
  346. if (linePos > k) then begin
  347. if (line[linePos] in [' ',#9,'#', '=','&','[',']','<','>','''','/','|']) then
  348. Result := Copy(line,k,(linePos-k))
  349. else
  350. Result := Copy(line,k,(linePos-k)); //Result := Copy(line,k,(linePos-k+1));
  351. end else begin
  352. Result := '';
  353. end;
  354. end;
  355. function NextToken() : UTF8String; overload;
  356. var
  357. k : Integer;
  358. ks : UTF8String;
  359. begin
  360. specialChararter := False;
  361. while True do begin
  362. while (linePos <= lineLength) and CharInSet(line[linePos],[' ', #9, #13]) do begin
  363. Inc(linePos);
  364. end;
  365. if (linePos > lineLength) or (line[linePos] = '#') then begin
  366. if not NextLine() then begin
  367. if (line[linePos] = '#') then
  368. linePos := lineLength+1; // A comment terminates a line !
  369. exit('');
  370. end;
  371. Continue;
  372. end ;
  373. Break;
  374. end;
  375. if (linePos > lineLength) then
  376. exit('');
  377. if (line[linePos] = '*') then begin
  378. linePos := linePos+1;
  379. specialChararter := True;
  380. exit('*');
  381. end;
  382. k := linePos;
  383. if (linePos <= lineLength) and CharInSet(line[linePos],['<','>']) then begin
  384. ks := line[linePos];
  385. while (linePos <= lineLength) and (line[linePos] = ks) do begin
  386. Inc(linePos);
  387. end;
  388. Result := Copy(line,k,(linePos-k));
  389. exit;
  390. end;
  391. if (linePos <= lineLength) and
  392. CharInSet(line[linePos],['=','&','[',']','<','>','/','|'])
  393. then begin
  394. Inc(linePos);
  395. Result := Copy(line,k,(linePos-k));
  396. specialChararter := True;
  397. exit;
  398. end;
  399. {if (line[linePos] = '''') then
  400. exit(ReadQuotedString()); }
  401. Result := '';
  402. while (linePos <= lineLength) do begin
  403. if CharInSet(line[linePos],[' ',#9,#13,'#', '=','&','[',']','<','>','/','|']) then
  404. Break;
  405. if (line[linePos] <> '''') then
  406. ks := ReadUnQuotedString()
  407. else
  408. ks := ReadQuotedString();
  409. if (ks = '') then
  410. Break;
  411. Result := Result + ks;
  412. end;
  413. end;
  414. function NextToken(const AMustSucceed : Boolean) : UTF8String; overload;
  415. begin
  416. Result := NextToken();
  417. if (Result = '') and AMustSucceed then
  418. raise Exception.CreateFmt('Unexpected end of line(%d) : "%s".',[lineIndex,CurrentLine()]);
  419. end;
  420. procedure CheckToken(const AActual, AExpectedToken : UTF8String);
  421. begin
  422. if (AActual <> AExpectedToken) then
  423. raise Exception.CreateFmt(
  424. '"%s" expected but "%s" found at position %d, BufferPosition(%d), line(%d) = "%s".',
  425. [AExpectedToken,AActual,linePos,bufferPos,lineIndex,CurrentLine()]
  426. );
  427. end;
  428. function parse_reset() : Boolean;
  429. var
  430. s, s1 : UTF8String;
  431. logicalPos : TReorderLogicalReset;
  432. k : Integer;
  433. begin
  434. s := NextToken(True);
  435. if (s = '[') and specialChararter then begin
  436. s := NextToken();
  437. if (s = s_BEFORE) then begin
  438. s := NextToken();
  439. if not(TryStrToInt(s,k)) or (k < 1) or (k > 3) then
  440. CheckToken(s,'"1" or "2" or "3"');
  441. CheckToken(NextToken(True),']');
  442. statement^.Reset := String2UnicodeCodePointArray(NextToken(True));
  443. statement^.Before := True;
  444. end else begin
  445. while True do begin
  446. s1 := NextToken();
  447. if (s1 = '') or (s1 = ']') then
  448. break;
  449. s := s + Trim(s1)
  450. end;
  451. CheckToken(s1,']');
  452. if (s = '') then
  453. raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]);
  454. if not TryStrToLogicalReorder(s,logicalPos) then
  455. raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
  456. statement^.LogicalPosition := logicalPos;
  457. end;
  458. end else begin
  459. statement^.Reset := String2UnicodeCodePointArray(s);
  460. end;
  461. if (statement^.LogicalPosition = TReorderLogicalReset.None) and
  462. (Length(statement^.Reset) = 0)
  463. then
  464. raise Exception.Create(sInvalidResetClause);
  465. Result := True;
  466. end;
  467. procedure EnsureElementLength(const ALength : Integer);
  468. var
  469. k, d : Integer;
  470. begin
  471. k := Length(statement^.Elements);
  472. if (k < ALength) then begin
  473. k := ALength;
  474. if (k = 0) then begin
  475. k := 50;
  476. end else begin
  477. if (k < 10) then
  478. d := 10
  479. else
  480. d := 2;
  481. k := k * d;
  482. end;
  483. statement^.SetElementCount(k);
  484. end;
  485. end;
  486. procedure AddElement(
  487. const AChars : array of UCS4Char;
  488. const AWeigthKind : TReorderWeigthKind;
  489. const AContext : UTF8String
  490. );overload;
  491. var
  492. kp : PReorderUnit;
  493. kc, k : Integer;
  494. begin
  495. EnsureElementLength(elementActualCount+1);
  496. kp := @statement^.Elements[elementActualCount];
  497. kc := Length(AChars)-1;
  498. if (kc < 0) then
  499. kc := 0;
  500. SetLength(kp^.Characters,kc);
  501. for k := 0 to kc - 1 do
  502. kp^.Characters[k] := AChars[k];
  503. kp^.WeigthKind := AWeigthKind;
  504. elementActualCount := elementActualCount + 1;
  505. if (AContext <> '') then
  506. kp^.Context := String2UnicodeCodePointArray(AContext);
  507. end;
  508. procedure AddElement(
  509. const AChar : UCS4Char;
  510. const AWeigthKind : TReorderWeigthKind;
  511. const AContext : UTF8String
  512. );overload;
  513. var
  514. kp : PReorderUnit;
  515. kc, k : Integer;
  516. begin
  517. EnsureElementLength(elementActualCount+1);
  518. kp := @statement^.Elements[elementActualCount];
  519. SetLength(kp^.Characters,1);
  520. kp^.Characters[0] := AChar;
  521. kp^.WeigthKind := AWeigthKind;
  522. elementActualCount := elementActualCount + 1;
  523. if (AContext <> '') then
  524. kp^.Context := String2UnicodeCodePointArray(AContext);
  525. end;
  526. function ReadNextItem() : Boolean;
  527. var
  528. contextStr : UTF8String;
  529. w : TReorderWeigthKind;
  530. last : PReorderUnit;
  531. u4str : UCS4String;
  532. s, ts : UTF8String;
  533. expandStr : TUnicodeCodePointArray;
  534. k, kc, x : Integer;
  535. us : UnicodeString;
  536. begin
  537. contextStr := '';
  538. expandStr := nil;
  539. Result := False;
  540. SaveState();
  541. s := NextToken();
  542. if (s = '') then begin
  543. DiscardState();
  544. exit;
  545. end;
  546. if specialChararter and (s = '&') then begin
  547. RestoreState();
  548. exit;
  549. end;
  550. DiscardState();
  551. if not TryStringToReorderWeigthKind(s,w) then
  552. CheckToken(s,'Reorder Weigth');
  553. s := NextToken(True);
  554. if specialChararter then begin
  555. if (s = '[') then begin
  556. k := 1;
  557. while True do begin
  558. ts := NextToken(True);
  559. s := s + ts;
  560. if specialChararter then begin
  561. if (ts = '[') then
  562. k := k+1
  563. else if (ts = ']') then begin
  564. k := k-1;
  565. if (k = 0) then
  566. Break;
  567. end;
  568. end;
  569. end;
  570. if (Pos('variable',s) > 0) then
  571. exit(True);
  572. end else if (s = '*') then begin
  573. s := NextToken(True);
  574. us := UTF8Decode(s);
  575. u4str := UnicodeStringToUCS4String(us);
  576. kc := Length(u4str)-1;
  577. k := 0;
  578. while (k <= (kc-1)) do begin
  579. if (k > 0) and (u4str[k] = Ord('-')) then begin
  580. if (k = (kc-1)) then begin
  581. AddElement(u4str[k],w,contextStr);
  582. end else begin
  583. for x := (u4str[k-1]+1) to u4str[k+1] do
  584. AddElement(x,w,contextStr);
  585. k := k+1;
  586. end;
  587. end else begin
  588. AddElement(u4str[k],w,contextStr);
  589. end;
  590. k := k+1;
  591. end;
  592. exit(True);
  593. end;
  594. end;
  595. SaveState();
  596. ts := NextToken();
  597. if (ts = '') or not(specialChararter) then begin
  598. RestoreState();
  599. us := UTF8Decode(s);
  600. u4str := UnicodeStringToUCS4String(us);
  601. end else begin
  602. if (ts = '|') then begin
  603. DiscardState();
  604. contextStr := s;
  605. s := NextToken(True);
  606. SaveState();
  607. ts := NextToken();
  608. end;
  609. if specialChararter and (ts = '/') then begin
  610. expandStr := String2UnicodeCodePointArray(NextToken(True));
  611. DiscardState();
  612. end else begin
  613. RestoreState();
  614. end;
  615. u4str := UnicodeStringToUCS4String(UTF8Decode(s));
  616. end;
  617. AddElement(u4str,w,contextStr);
  618. if (Length(expandStr) > 0) then begin
  619. last := @statement^.Elements[elementActualCount-1];
  620. last^.ExpansionChars := expandStr;
  621. end;
  622. Result := True;
  623. end;
  624. function ReadUnicodeSet() : UTF8String;
  625. var
  626. k, c : Integer;
  627. ks : UTF8String;
  628. begin
  629. while True do begin
  630. while (linePos <= lineLength) and CharInSet(line[linePos],[' ', #9, #13]) do begin
  631. Inc(linePos);
  632. end;
  633. if (linePos > lineLength) or (line[linePos] = '#') then begin
  634. if not NextLine() then begin
  635. if (line[linePos] = '#') then
  636. linePos := lineLength+1; // A comment terminates a line !
  637. exit('');
  638. end;
  639. Continue;
  640. end ;
  641. Break;
  642. end;
  643. if (linePos > lineLength) then
  644. exit('');
  645. if (line[linePos] <> '[') then
  646. exit;
  647. k := linePos;
  648. c := 1;
  649. ks := '';
  650. linePos := linePos+1;
  651. while (linePos <= lineLength) do begin
  652. if (line[linePos] = '[') then
  653. c := c+1
  654. else if (line[linePos] = ']') then
  655. c := c-1;
  656. if (c = 0) then
  657. break;
  658. linePos := linePos+1;
  659. if (linePos > lineLength) then begin
  660. ks := ks+Copy(line,k,linePos);
  661. if not NextLine() then
  662. raise Exception.CreateFmt(sInvalidUnicodeSetExpression,[line]);
  663. k := linePos;
  664. end;
  665. end;
  666. if (line[linePos] <> ']') then
  667. raise Exception.CreateFmt(sInvalidUnicodeSetExpression,[line]);
  668. linePos := linePos+1;
  669. ks := ks+Copy(line,k,(linePos-k));
  670. Result := ks;
  671. end;
  672. function ParseSetting() : Boolean;
  673. var
  674. name, value : UTF8String;
  675. c, k : Integer;
  676. begin
  677. name := NextToken(True);
  678. if (name = ']') then
  679. raise Exception.CreateFmt(sInvalidSettingExpression,[line]);
  680. AStatement^.Setting.Name := name;
  681. AStatement^.Setting.OptionValue := StringToSettingOption(AStatement^.Setting.Name);
  682. if (AStatement^.Setting.OptionValue in SETTING_WITH_UNICODESET) then begin
  683. value := ReadUnicodeSet();
  684. if (value = '') then
  685. raise Exception.CreateFmt(sInvalidSettingExpression,[line]);
  686. CheckToken(NextToken(True),']');
  687. SetLength(AStatement^.Setting.Values,1);
  688. AStatement^.Setting.Values[0] := value;
  689. Result := True;
  690. end else begin
  691. c := 0;
  692. while True do begin
  693. value := NextToken((c = 0));
  694. if (value = '') or (specialChararter and (value = ']')) then begin
  695. if (c = 0) then
  696. raise Exception.CreateFmt(sInvalidSettingExpression,[line]);
  697. break;
  698. end;
  699. k := Length(AStatement^.Setting.Values);
  700. SetLength(AStatement^.Setting.Values,(k+1));
  701. AStatement^.Setting.Values[k] := value;
  702. c := c+1;
  703. end;
  704. Result := (c > 0);
  705. end;
  706. end;
  707. var
  708. locToken : UTF8String;
  709. begin
  710. Result := False;
  711. elementActualCount := 0;
  712. if (AStartPosition >= AMaxLen) then
  713. exit;
  714. historyItemIndex := -1;
  715. lineIndex := ALineCount;
  716. bufferLength := AMaxLen;
  717. bufferPos := AStartPosition;
  718. p := AData+AStartPosition;
  719. SetLength(line,LINE_LENGTH);
  720. Clear(AStatement^);
  721. if not NextLine() then
  722. exit;
  723. locToken := NextToken();
  724. if (locToken = '') then
  725. exit;
  726. if not specialChararter then
  727. raise Exception.CreateFmt(sSpecialCharacterExpected,[locToken,CurrentLine()]);
  728. if (locToken = '&') then begin
  729. AStatement.Kind := TStatementKind.Sequence;
  730. statement := @AStatement.ReorderSequence;
  731. if not parse_reset() then
  732. exit;
  733. while ReadNextItem() do begin
  734. // All done in the condition
  735. end;
  736. statement^.SetElementCount(elementActualCount);
  737. end else if (locToken = '[') then begin
  738. if not ParseSetting() then
  739. exit;
  740. AStatement.Kind := TStatementKind.Setting;
  741. end;
  742. if (linePos > lineLength) then
  743. linePos := lineLength+1;
  744. ANextPos := bufferPos-lineLength+linePos-1;
  745. Result := (ANextPos > AStartPosition);
  746. ALineCount := lineIndex;
  747. end;
  748. procedure ParseInitialDocument(
  749. ASequence : POrderedCharacters;
  750. ADoc : TCustomMemoryStream;
  751. ASettings : TSettingRecArray
  752. );
  753. var
  754. buffer : PAnsiChar;
  755. bufferLength : Integer;
  756. i, nextPost : Integer;
  757. statement : TParsedStatement;
  758. p : PReorderUnit;
  759. lineCount : Integer;
  760. begin
  761. if (ADoc.Size < 1) then
  762. exit;
  763. buffer := ADoc.Memory; //0xEF,0xBB,0xBF
  764. bufferLength := ADoc.Size;
  765. if (bufferLength >= 3) and
  766. (Byte(buffer[0]) = $EF) and
  767. (Byte(buffer[1]) = $BB) and
  768. (Byte(buffer[2]) = $BF)
  769. then begin
  770. Inc(buffer,3);
  771. Dec(bufferLength,3);
  772. end;
  773. lineCount := 0;
  774. ASequence^.Clear();
  775. SetLength(ASequence^.Data,50000);
  776. nextPost := 0;
  777. i := 0;
  778. while (i < bufferLength) do begin
  779. Clear(statement);
  780. if not ParseStatement(buffer,i,bufferLength,@statement,nextPost,lineCount) then
  781. Break;
  782. i := nextPost;
  783. try
  784. if (statement.Kind = TStatementKind.Sequence) then
  785. ASequence^.ApplyStatement(@statement.ReorderSequence)
  786. else
  787. AddItem(ASettings,@statement.Setting);
  788. except
  789. on e : Exception do begin
  790. e.Message := Format('%s Position = %d',[e.Message,i]);
  791. raise;
  792. end;
  793. end;
  794. end;
  795. if (ASequence^.ActualLength > 0) then begin
  796. p := @ASequence^.Data[0];
  797. for i := 0 to ASequence^.ActualLength - 1 do begin
  798. p^.Changed := False;
  799. Inc(p);
  800. end;
  801. end;
  802. end;
  803. procedure ParseInitialDocument(
  804. ASequence : POrderedCharacters;
  805. AFileName : string;
  806. ASettings : TSettingRecArray
  807. );
  808. var
  809. doc : TMemoryStream;
  810. begin
  811. doc := TMemoryStream.Create();
  812. try
  813. doc.LoadFromFile(AFileName);
  814. doc.Position := 0;
  815. ParseInitialDocument(ASequence,doc,ASettings);
  816. finally
  817. doc.Free();
  818. end;
  819. end;
  820. end.