File Text.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. /******************************************************************************/
  2. #include "stdafx.h"
  3. namespace EE{
  4. /******************************************************************************/
  5. #define BOM_UTF_8 0xBBEF
  6. #define BOM_UTF_8_3 0xBF
  7. #define BOM_UTF_16 0xFEFF
  8. /******************************************************************************/
  9. static ENCODING LoadEncoding(File &f)
  10. {
  11. if(f.size()>=2) // encoding can be present only if there are at least 2 bytes
  12. {
  13. switch(f.getUShort())
  14. {
  15. case BOM_UTF_16: return UTF_16;
  16. case BOM_UTF_8 : if(f.getByte()==BOM_UTF_8_3)return UTF_8; break;
  17. }
  18. f.pos(0); // if there was no byte order mark found then reset position to the start
  19. }
  20. return ANSI; // default encoding
  21. }
  22. static void SaveEncoding(File &f, ENCODING encoding)
  23. {
  24. if(!f.size())switch(encoding) // we can save encoding only at the start of the file
  25. {
  26. case UTF_16: f.putUShort(BOM_UTF_16); break;
  27. case UTF_8 : f.putMulti (U16(BOM_UTF_8), Byte(BOM_UTF_8_3)); break;
  28. }
  29. }
  30. /******************************************************************************/
  31. void FileText::zero () {fix_new_line=true; indent=INDENT_TABS; depth=0; _code=ANSI;}
  32. FileText& FileText::del () {_f.del(); zero(); return T;}
  33. FileText::FileText() {zero();}
  34. FileText& FileText::writeMem(ENCODING encoding, Cipher *cipher)
  35. {
  36. del();
  37. _f.writeMem(65536, cipher);
  38. SaveEncoding(_f, T._code=encoding);
  39. return T;
  40. }
  41. Bool FileText::write(C Str &name, ENCODING encoding, Cipher *cipher)
  42. {
  43. del();
  44. if(_f.writeTry(name, cipher))
  45. {
  46. SaveEncoding(_f, T._code=encoding);
  47. return true;
  48. }
  49. return false;
  50. }
  51. Bool FileText::append(C Str &name, ENCODING encoding, Cipher *cipher)
  52. {
  53. del();
  54. if(_f.readStdTry(name, cipher))if(_f.size())encoding=LoadEncoding(_f); // take encoding from the file if it has some data
  55. if(_f. appendTry(name, cipher))
  56. {
  57. SaveEncoding(_f, T._code=encoding);
  58. return true;
  59. }
  60. return false;
  61. }
  62. Bool FileText::read(C Str &name, Cipher *cipher)
  63. {
  64. del();
  65. if(_f.readTry(name, cipher))
  66. {
  67. _code=LoadEncoding(_f);
  68. return true;
  69. }
  70. return false;
  71. }
  72. Bool FileText::read(C UID &id, Cipher *cipher)
  73. {
  74. del();
  75. if(_f.readTry(id, cipher))
  76. {
  77. _code=LoadEncoding(_f);
  78. return true;
  79. }
  80. return false;
  81. }
  82. Bool FileText::read(C Str &name, Pak &pak)
  83. {
  84. del();
  85. if(_f.readTry(name, pak))
  86. {
  87. _code=LoadEncoding(_f);
  88. return true;
  89. }
  90. return false;
  91. }
  92. Bool FileText::read(C UID &id, Pak &pak)
  93. {
  94. del();
  95. if(_f.readTry(id, pak))
  96. {
  97. _code=LoadEncoding(_f);
  98. return true;
  99. }
  100. return false;
  101. }
  102. FileText& FileText::readMem(CPtr data, Int size, Int encoding, Cipher *cipher)
  103. {
  104. del();
  105. _f.readMem(data, size, cipher);
  106. _code=LoadEncoding(_f); // load encoding always to skip potential BOM
  107. if(encoding>=0)_code=(ENCODING)encoding; // override encoding if it was specified
  108. return T;
  109. }
  110. /******************************************************************************/
  111. FileText& FileText::startLine()
  112. {
  113. if(indent)REP(depth)if(indent==INDENT_TABS)putChar('\t');else putText(" ");
  114. return T;
  115. }
  116. FileText& FileText::endLine()
  117. {
  118. switch(_code)
  119. {
  120. case ANSI :
  121. case UTF_8 :
  122. case UTF_8_NAKED: if(fix_new_line)_f.putUShort(0x0A0D);else _f.putByte(0x0A); break;
  123. case UTF_16: if(fix_new_line)_f.putUInt(0x000A000D);else _f.putUShort(0x000A); break;
  124. }
  125. return T;
  126. }
  127. FileText& FileText::putChar(Char8 c)
  128. {
  129. switch(c)
  130. {
  131. case '\n': endLine(); break;
  132. case '\r': if(fix_new_line)break; // if we're fixing new line, then never write '\r' manually, otherwise continue, that's why there's no break on purpose
  133. default : switch(_code)
  134. {
  135. case ANSI : _f.putByte ( c ); break;
  136. case UTF_16: _f.putUShort(Char8To16Fast(c)); break; // we can assume that Str was already initialized
  137. case UTF_8 :
  138. case UTF_8_NAKED:
  139. {
  140. U8 u=c;
  141. if(u<=0x7F)_f.putByte (u);
  142. else _f.putMulti(Byte(0xC0 | (u>>6)), Byte(0x80 | (u&0x3F)));
  143. }break;
  144. }break;
  145. }
  146. return T;
  147. }
  148. FileText& FileText::putChar(Char c)
  149. {
  150. switch(c)
  151. {
  152. case '\n': endLine(); break;
  153. case '\r': if(fix_new_line)break; // if we're fixing new line, then never write '\r' manually, otherwise continue, that's why there's no break on purpose
  154. default : switch(_code)
  155. {
  156. case ANSI : _f.putByte (Char16To8Fast(c)); break; // we can assume that Str was already initialized
  157. case UTF_16: _f.putUShort( c ); break;
  158. case UTF_8 :
  159. case UTF_8_NAKED:
  160. {
  161. U16 u=c;
  162. if(u<=0x07F)_f.putByte (u);else
  163. if(u<=0x7FF)_f.putMulti(Byte(0xC0 | (u>> 6)), Byte(0x80 | ( u &0x3F)));else
  164. _f.putMulti(Byte(0xE0 | (u>>12)), Byte(0x80 | ((u>>6)&0x3F)), Byte(0x80 | (u&0x3F)));
  165. }break;
  166. }break;
  167. }
  168. return T;
  169. }
  170. FileText& FileText::putText(C Str &text)
  171. {
  172. Char temp[65536/SIZE(Char)]; // use Char to force alignment
  173. switch(_code)
  174. {
  175. case ANSI: if(!fix_new_line){Str8 t=text; _f.putN(t(), t.length());}else
  176. {
  177. Str8 t=FixNewLine(text); _f.putN(t(), t.length());
  178. }break;
  179. case UTF_16: if(!fix_new_line){_f.putN(text(), text.length());}else
  180. {
  181. #if 0 // slower
  182. Str t=FixNewLine(text); _f.putN(t(), t.length());
  183. #else // faster
  184. const Int buf_elms=Elms(temp)-1; // use size -1 because we may be writing 2 characters in one step for "\r\n"
  185. for(Int i=0, buf_pos=0; ; )
  186. {
  187. Bool end=(i>=text.length());
  188. if( end || !InRange(buf_pos, buf_elms)) // if finished, or there's no more room in the buffer
  189. {
  190. if(buf_pos){_f.putN(temp, buf_pos); buf_pos=0;} // flush
  191. if(end)break;
  192. }
  193. U16 c=text()[i++]; // () avoids range checks
  194. if( c!='\r')
  195. if( c!='\n')temp[buf_pos++]=c;else
  196. {
  197. temp[buf_pos++]='\r';
  198. temp[buf_pos++]='\n';
  199. }
  200. }
  201. #endif
  202. }break;
  203. case UTF_8 :
  204. case UTF_8_NAKED:
  205. {
  206. #if 0 // slower
  207. FREPA(text)putChar(text[i]);
  208. #else // faster
  209. Char8 *C buf =(Char8*)temp;
  210. const Int buf_elms=SIZE(temp)/SIZE(Char8)-2; // use size -2 because we may be writing 3 characters in one step
  211. for(Int i=0, buf_pos=0; ; )
  212. {
  213. Bool end=(i>=text.length());
  214. if( end || !InRange(buf_pos, buf_elms)) // if finished, or there's no more room in the buffer
  215. {
  216. if(buf_pos){_f.putN(buf, buf_pos); buf_pos=0;} // flush
  217. if(end)break;
  218. }
  219. U16 c=text()[i++]; // () avoids range checks
  220. if(fix_new_line)switch(c)
  221. {
  222. case '\r': continue; // use 'continue' to don't store this character
  223. case '\n':
  224. {
  225. buf[buf_pos++]='\r';
  226. buf[buf_pos++]='\n';
  227. }continue; // use 'continue' because we've already stored this character
  228. }
  229. if(c<=0x07F) buf[buf_pos++]=c;else
  230. if(c<=0x7FF){buf[buf_pos++]=(0xC0 | (c>> 6)); buf[buf_pos++]=(0x80 | ( c &0x3F));}else
  231. {buf[buf_pos++]=(0xE0 | (c>>12)); buf[buf_pos++]=(0x80 | ((c>>6)&0x3F)); buf[buf_pos++]=(0x80 | (c&0x3F));}
  232. }
  233. #endif
  234. }break;
  235. }
  236. return T;
  237. }
  238. FileText& FileText::putLine(C Str &text)
  239. {
  240. return startLine().putText(text).endLine();
  241. }
  242. /******************************************************************************/
  243. Char FileText::getChar()
  244. {
  245. switch(_code)
  246. {
  247. default : return 0;
  248. case UTF_16: return _f.getUShort();
  249. case ANSI:
  250. {
  251. Char8 c[2]; c[0]=_f.getByte();
  252. #if WINDOWS // Code Pages are used on Windows
  253. if((c[0]&0x80) && !_f.end()) // this may be a 2-character multi-byte wide char
  254. {
  255. c[1]=_f.getByte();
  256. wchar_t w[2]; Int size=MultiByteToWideChar(CP_ACP, 0, c, Elms(c), w, Elms(w));
  257. if(size==1)return w[0]; // if 2 bytes generated 1 wide char, then return it
  258. _f.skip(-1); // otherwise, go back the extra byte that we've read, because we want to get only 1 char at this time
  259. }
  260. #endif
  261. return Char8To16Fast(c[0]); // we can assume that Str was already initialized
  262. }break;
  263. case UTF_8 :
  264. case UTF_8_NAKED:
  265. {
  266. Byte b0=_f.getByte();
  267. if(b0&(1<<7))
  268. {
  269. Byte b1=(_f.getByte()&0x3F);
  270. if((b0&(1<<6)) && (b0&(1<<5)))
  271. {
  272. Byte b2=(_f.getByte()&0x3F);
  273. if(b0&(1<<4))
  274. {
  275. Byte b3=(_f.getByte()&0x3F);
  276. b0&=0x07;
  277. UInt u=(b3|(b2<<6)|(b1<<12)|(b0<<18));
  278. return (u<=0xFFFF) ? u : '?';
  279. }else
  280. {
  281. b0&=0x0F;
  282. return b2|(b1<<6)|(b0<<12);
  283. }
  284. }else
  285. {
  286. b0&=0x1F;
  287. return b1|(b0<<6);
  288. }
  289. }else
  290. {
  291. return b0;
  292. }
  293. }break;
  294. }
  295. }
  296. FileText& FileText::skipLine()
  297. {
  298. for(; !end(); )
  299. {
  300. if(getChar()=='\n')break;
  301. }
  302. return T;
  303. }
  304. FileText& FileText::fullLine(Str &s)
  305. {
  306. s.clear();
  307. for(; !end(); )
  308. {
  309. Char c=getChar();
  310. if( c=='\n')break;
  311. if(U16(c)>=32 || c=='\t')s+=c;
  312. }
  313. return T;
  314. }
  315. FileText& FileText::getLine(Str &s)
  316. {
  317. s.clear();
  318. for(Bool start=true; !end(); )
  319. {
  320. Char c=getChar();
  321. if( c=='\n')break;
  322. if(U16(c)>=32 || c=='\t')
  323. {
  324. if(c!=' ' && c!='\t')start=false;
  325. if(!start)s+=c;
  326. }
  327. }
  328. return T;
  329. }
  330. FileText& FileText::getLine(Str8 &s)
  331. {
  332. s.clear();
  333. for(Bool start=true; !end(); )
  334. {
  335. Char c=getChar();
  336. if( c=='\n')break;
  337. if(U16(c)>=32 || c=='\t')
  338. {
  339. if(c!=' ' && c!='\t')start=false;
  340. if(!start)s+=c;
  341. }
  342. }
  343. return T;
  344. }
  345. FileText& FileText::getAll(Str &s)
  346. {
  347. s.clear();
  348. Int chars=((_code==UTF_16) ? _f.left()/SIZEI(Char) : _f.left()); if(chars>0)
  349. {
  350. s.reserve(chars); switch(_code)
  351. {
  352. case UTF_16:
  353. {
  354. s._length=_f.getReturnSize(s._d.data(), chars*SIZEI(Char))/SIZEI(Char); if(s.length()!=chars)_f._ok=false;
  355. #if WINDOWS
  356. utf_16:
  357. #endif
  358. Int length=s.length(); CChar *t=s();
  359. FREP(length)
  360. {
  361. Char c=t[i]; if(!(U16(c)>=32 || c=='\t' || c=='\n')) // found invalid char
  362. {
  363. Str temp; temp.reserve(length);
  364. CopyFastN(temp._d.data(), t, i); temp._length+=i; // copy everything up to this point
  365. #if 0 // simple iteration (slower)
  366. for(; ++i<length; ){Char c=t[i]; if(U16(c)>=32 || c=='\t' || c=='\n')temp._d[temp._length++]=c;}
  367. #else // batched copying
  368. Int last_ok=i+1; for(; ++i<length; )
  369. {
  370. Char c=t[i]; if(!(U16(c)>=32 || c=='\t' || c=='\n'))
  371. {
  372. Int copy=i-last_ok; CopyFastN(temp._d.data()+temp._length, t+last_ok, copy); temp._length+=copy;
  373. last_ok=i+1;
  374. }
  375. }
  376. Int copy=length-last_ok; CopyFastN(temp._d.data()+temp._length, t+last_ok, copy); temp._length+=copy;
  377. #endif
  378. Swap(temp, s);
  379. break;
  380. }
  381. }
  382. }break;
  383. case ANSI:
  384. {
  385. Str8 s8; s8.reserve(chars);
  386. Int length=s8._length=_f.getReturnSize(s8._d.data(), chars); if(length!=chars)_f._ok=false;
  387. Char *t=s._d.data(); CChar8 *t8=s8();
  388. FREP(length)
  389. {
  390. Char8 c=t8[i];
  391. if(U8(c)>=32 || c=='\t' || c=='\n')
  392. {
  393. #if WINDOWS // Code Pages are used on Windows
  394. if(c&0x80) // this may be a 2-character multi-byte wide char
  395. {
  396. s._length=MultiByteToWideChar(CP_ACP, 0, t8, length, WChar(s._d.data()), chars); // let OS handle multi-byte conversion
  397. goto utf_16; // we've converted to 16-bit so process as UTF-16
  398. }
  399. #endif
  400. *t++=Char8To16Fast(c); // we can assume that Str was already initialized
  401. }
  402. }
  403. s._length=t-s._d.data();
  404. }break;
  405. default:
  406. {
  407. Char *t=s._d.data();
  408. for(; !end(); )
  409. {
  410. Char c=getChar();
  411. if(U16(c)>=32 || c=='\t' || c=='\n')*t++=c;
  412. }
  413. s._length=t-s._d.data();
  414. }break;
  415. }
  416. s._d[s._length]='\0';
  417. }
  418. return T;
  419. }
  420. FileText& FileText::rewind()
  421. {
  422. depth=0;
  423. _f.pos(0);
  424. LoadEncoding(_f); // we already know the encoding, but we need to skip the byte order mark at start, don't set it to '_code' because 'UTF_8_NAKED' may had been used
  425. return T;
  426. }
  427. Bool FileText::copy(File &dest)
  428. {
  429. depth=0;
  430. if(!_f.pos(0))return false;
  431. return _f.copy(dest);
  432. }
  433. Char FileText::posInfo(Long pos, VecI2 &col_line)
  434. {
  435. rewind();
  436. Char last='\0'; VecI2 cl=0;
  437. if(pos>=T.pos())
  438. for(MIN(pos, size()); ; ) // this will allow reading right after the last character
  439. {
  440. last=getChar();
  441. if(T.pos()>pos || !ok())break;
  442. if(last=='\n'){cl.x=0; cl.y++;}else
  443. if(last!='\r') cl.x++;
  444. }
  445. col_line=cl; return last;
  446. }
  447. /******************************************************************************/
  448. }
  449. /******************************************************************************/
  450. void FileTextEx::get(Int &i) { i=getInt();}
  451. void FileTextEx::get(Flt &f) { f=getFlt();}
  452. void FileTextEx::get(Dbl &d) { d=getDbl();}
  453. void FileTextEx::get(Vec2 &v) {v.x=getFlt(); v.y=getFlt();}
  454. void FileTextEx::get(Vec &v) {v.x=getFlt(); v.y=getFlt(); v.z=getFlt();}
  455. void FileTextEx::get(Vec4 &v) {v.x=getFlt(); v.y=getFlt(); v.z=getFlt(); v.w=getFlt();}
  456. void FileTextEx::get(VecI2 &v) {v.x=getInt(); v.y=getInt();}
  457. void FileTextEx::get(VecI &v) {v.x=getInt(); v.y=getInt(); v.z=getInt();}
  458. void FileTextEx::get(VecI4 &v) {v.x=getInt(); v.y=getInt(); v.z=getInt(); v.w=getInt();}
  459. void FileTextEx::get(VecB4 &v) {v.x=getInt(); v.y=getInt(); v.z=getInt(); v.w=getInt();}
  460. Bool FileTextEx::getBool () {return TextBool(getWord());}
  461. Int FileTextEx::getInt () {return TextInt (getWord());}
  462. UInt FileTextEx::getUInt () {return TextUInt(getWord());}
  463. Flt FileTextEx::getFlt () {return TextFlt (getWord());}
  464. Dbl FileTextEx::getDbl () {return TextDbl (getWord());}
  465. Vec2 FileTextEx::getVec2 () {Vec2 v; get(v); return v;}
  466. Vec FileTextEx::getVec () {Vec v; get(v); return v;}
  467. Vec4 FileTextEx::getVec4 () {Vec4 v; get(v); return v;}
  468. VecI2 FileTextEx::getVecI2() {VecI2 v; get(v); return v;}
  469. VecI FileTextEx::getVecI () {VecI v; get(v); return v;}
  470. VecI4 FileTextEx::getVecI4() {VecI4 v; get(v); return v;}
  471. VecB4 FileTextEx::getVecB4() {VecB4 v; get(v); return v;}
  472. C Str& FileTextEx::getWord()
  473. {
  474. text.clear();
  475. for(Bool start=true; !end(); )
  476. {
  477. Char c=getChar();
  478. if( !c)break;
  479. if( c=='\t' || c=='\n' || c==' ')if(start)continue;else break;
  480. if(U16(c)>32)
  481. {
  482. start=false;
  483. text+=c;
  484. }
  485. }
  486. return text;
  487. }
  488. C Str& FileTextEx::getName()
  489. {
  490. text.clear();
  491. for(; !end(); )
  492. {
  493. Char c=getChar();
  494. if( !c || c=='\n')break;
  495. if( c=='"')
  496. {
  497. for(; !end(); )
  498. {
  499. Char c=getChar();
  500. if( !c || c=='\n' || c=='"')break;
  501. if(U16(c)>=32 || c=='\t')text+=c;
  502. }
  503. break;
  504. }
  505. }
  506. return text;
  507. }
  508. /******************************************************************************/
  509. Bool FileTextEx::getIn()
  510. {
  511. for(; !end(); )
  512. {
  513. getWord();
  514. if(text.first()=='{')return true;
  515. if(text.first()=='}')return false;
  516. }
  517. return false;
  518. }
  519. void FileTextEx::getOut()
  520. {
  521. for(Int depth=0; !end(); )
  522. {
  523. getWord();
  524. if(text.first()=='{')depth++;else
  525. if(text.first()=='}')if(--depth<0)break;
  526. }
  527. }
  528. Bool FileTextEx::level()
  529. {
  530. for(; !end(); )
  531. {
  532. getWord();
  533. if(text.first()=='}')break;
  534. if(text.first()=='{'){getOut(); continue;}
  535. return true;
  536. }
  537. return false;
  538. }
  539. /******************************************************************************/