Str.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314
  1. //
  2. // Copyright (c) 2008-2015 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #include "Precompiled.h"
  23. #include "../Container/Str.h"
  24. #include "../Container/Swap.h"
  25. #include <cstdio>
  26. #include "../DebugNew.h"
  27. namespace Atomic
  28. {
  29. char String::endZero = 0;
  30. const String String::EMPTY;
  31. String::String(const WString& str) :
  32. length_(0),
  33. capacity_(0),
  34. buffer_(&endZero)
  35. {
  36. SetUTF8FromWChar(str.CString());
  37. }
  38. String::String(int value) :
  39. length_(0),
  40. capacity_(0),
  41. buffer_(&endZero)
  42. {
  43. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  44. sprintf(tempBuffer, "%d", value);
  45. *this = tempBuffer;
  46. }
  47. String::String(short value) :
  48. length_(0),
  49. capacity_(0),
  50. buffer_(&endZero)
  51. {
  52. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  53. sprintf(tempBuffer, "%d", value);
  54. *this = tempBuffer;
  55. }
  56. String::String(long value) :
  57. length_(0),
  58. capacity_(0),
  59. buffer_(&endZero)
  60. {
  61. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  62. sprintf(tempBuffer, "%ld", value);
  63. *this = tempBuffer;
  64. }
  65. String::String(long long value) :
  66. length_(0),
  67. capacity_(0),
  68. buffer_(&endZero)
  69. {
  70. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  71. sprintf(tempBuffer, "%lld", value);
  72. *this = tempBuffer;
  73. }
  74. String::String(unsigned value) :
  75. length_(0),
  76. capacity_(0),
  77. buffer_(&endZero)
  78. {
  79. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  80. sprintf(tempBuffer, "%u", value);
  81. *this = tempBuffer;
  82. }
  83. String::String(unsigned short value) :
  84. length_(0),
  85. capacity_(0),
  86. buffer_(&endZero)
  87. {
  88. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  89. sprintf(tempBuffer, "%u", value);
  90. *this = tempBuffer;
  91. }
  92. String::String(unsigned long value) :
  93. length_(0),
  94. capacity_(0),
  95. buffer_(&endZero)
  96. {
  97. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  98. sprintf(tempBuffer, "%lu", value);
  99. *this = tempBuffer;
  100. }
  101. String::String(unsigned long long value) :
  102. length_(0),
  103. capacity_(0),
  104. buffer_(&endZero)
  105. {
  106. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  107. sprintf(tempBuffer, "%llu", value);
  108. *this = tempBuffer;
  109. }
  110. String::String(float value) :
  111. length_(0),
  112. capacity_(0),
  113. buffer_(&endZero)
  114. {
  115. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  116. sprintf(tempBuffer, "%g", value);
  117. *this = tempBuffer;
  118. }
  119. String::String(double value) :
  120. length_(0),
  121. capacity_(0),
  122. buffer_(&endZero)
  123. {
  124. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  125. sprintf(tempBuffer, "%g", value);
  126. *this = tempBuffer;
  127. }
  128. String::String(bool value) :
  129. length_(0),
  130. capacity_(0),
  131. buffer_(&endZero)
  132. {
  133. if (value)
  134. *this = "true";
  135. else
  136. *this = "false";
  137. }
  138. String::String(char value) :
  139. length_(0),
  140. capacity_(0),
  141. buffer_(&endZero)
  142. {
  143. Resize(1);
  144. buffer_[0] = value;
  145. }
  146. String::String(char value, unsigned length) :
  147. length_(0),
  148. capacity_(0),
  149. buffer_(&endZero)
  150. {
  151. Resize(length);
  152. for (unsigned i = 0; i < length; ++i)
  153. buffer_[i] = value;
  154. }
  155. String& String::operator += (int rhs)
  156. {
  157. return *this += String(rhs);
  158. }
  159. String& String::operator += (short rhs)
  160. {
  161. return *this += String(rhs);
  162. }
  163. String& String::operator += (long rhs)
  164. {
  165. return *this += String(rhs);
  166. }
  167. String& String::operator += (long long rhs)
  168. {
  169. return *this += String(rhs);
  170. }
  171. String& String::operator += (unsigned rhs)
  172. {
  173. return *this += String(rhs);
  174. }
  175. String& String::operator += (unsigned short rhs)
  176. {
  177. return *this += String(rhs);
  178. }
  179. String& String::operator += (unsigned long rhs)
  180. {
  181. return *this += String(rhs);
  182. }
  183. String& String::operator += (unsigned long long rhs)
  184. {
  185. return *this += String(rhs);
  186. }
  187. String& String::operator += (float rhs)
  188. {
  189. return *this += String(rhs);
  190. }
  191. String& String::operator += (bool rhs)
  192. {
  193. return *this += String(rhs);
  194. }
  195. void String::Replace(char replaceThis, char replaceWith, bool caseSensitive)
  196. {
  197. if (caseSensitive)
  198. {
  199. for (unsigned i = 0; i < length_; ++i)
  200. {
  201. if (buffer_[i] == replaceThis)
  202. buffer_[i] = replaceWith;
  203. }
  204. }
  205. else
  206. {
  207. replaceThis = tolower(replaceThis);
  208. for (unsigned i = 0; i < length_; ++i)
  209. {
  210. if (tolower(buffer_[i]) == replaceThis)
  211. buffer_[i] = replaceWith;
  212. }
  213. }
  214. }
  215. void String::Replace(const String& replaceThis, const String& replaceWith, bool caseSensitive)
  216. {
  217. unsigned nextPos = 0;
  218. while (nextPos < length_)
  219. {
  220. unsigned pos = Find(replaceThis, nextPos, caseSensitive);
  221. if (pos == NPOS)
  222. break;
  223. Replace(pos, replaceThis.length_, replaceWith);
  224. nextPos = pos + replaceWith.length_;
  225. }
  226. }
  227. void String::Replace(unsigned pos, unsigned length, const String& replaceWith)
  228. {
  229. // If substring is illegal, do nothing
  230. if (pos + length > length_)
  231. return;
  232. Replace(pos, length, replaceWith.buffer_, replaceWith.length_);
  233. }
  234. void String::Replace(unsigned pos, unsigned length, const char* replaceWith)
  235. {
  236. // If substring is illegal, do nothing
  237. if (pos + length > length_)
  238. return;
  239. Replace(pos, length, replaceWith, CStringLength(replaceWith));
  240. }
  241. String::Iterator String::Replace(const String::Iterator& start, const String::Iterator& end, const String& replaceWith)
  242. {
  243. unsigned pos = start - Begin();
  244. if (pos >= length_)
  245. return End();
  246. unsigned length = end - start;
  247. Replace(pos, length, replaceWith);
  248. return Begin() + pos;
  249. }
  250. String String::Replaced(char replaceThis, char replaceWith, bool caseSensitive) const
  251. {
  252. String ret(*this);
  253. ret.Replace(replaceThis, replaceWith, caseSensitive);
  254. return ret;
  255. }
  256. String String::Replaced(const String& replaceThis, const String& replaceWith, bool caseSensitive) const
  257. {
  258. String ret(*this);
  259. ret.Replace(replaceThis, replaceWith, caseSensitive);
  260. return ret;
  261. }
  262. String& String::Append(const String& str)
  263. {
  264. return *this += str;
  265. }
  266. String& String::Append(const char* str)
  267. {
  268. return *this += str;
  269. }
  270. String& String::Append(char c)
  271. {
  272. return *this += c;
  273. }
  274. String& String::Append(const char* str, unsigned length)
  275. {
  276. if (str)
  277. {
  278. unsigned oldLength = length_;
  279. Resize(oldLength + length);
  280. CopyChars(&buffer_[oldLength], str, length);
  281. }
  282. return *this;
  283. }
  284. void String::Insert(unsigned pos, const String& str)
  285. {
  286. if (pos > length_)
  287. pos = length_;
  288. if (pos == length_)
  289. (*this) += str;
  290. else
  291. Replace(pos, 0, str);
  292. }
  293. void String::Insert(unsigned pos, char c)
  294. {
  295. if (pos > length_)
  296. pos = length_;
  297. if (pos == length_)
  298. (*this) += c;
  299. else
  300. {
  301. unsigned oldLength = length_;
  302. Resize(length_ + 1);
  303. MoveRange(pos + 1, pos, oldLength - pos);
  304. buffer_[pos] = c;
  305. }
  306. }
  307. String::Iterator String::Insert(const String::Iterator& dest, const String& str)
  308. {
  309. unsigned pos = dest - Begin();
  310. if (pos > length_)
  311. pos = length_;
  312. Insert(pos, str);
  313. return Begin() + pos;
  314. }
  315. String::Iterator String::Insert(const String::Iterator& dest, const String::Iterator& start, const String::Iterator& end)
  316. {
  317. unsigned pos = dest - Begin();
  318. if (pos > length_)
  319. pos = length_;
  320. unsigned length = end - start;
  321. Replace(pos, 0, &(*start), length);
  322. return Begin() + pos;
  323. }
  324. String::Iterator String::Insert(const String::Iterator& dest, char c)
  325. {
  326. unsigned pos = dest - Begin();
  327. if (pos > length_)
  328. pos = length_;
  329. Insert(pos, c);
  330. return Begin() + pos;
  331. }
  332. void String::Erase(unsigned pos, unsigned length)
  333. {
  334. Replace(pos, length, String::EMPTY);
  335. }
  336. String::Iterator String::Erase(const String::Iterator& it)
  337. {
  338. unsigned pos = it - Begin();
  339. if (pos >= length_)
  340. return End();
  341. Erase(pos);
  342. return Begin() + pos;
  343. }
  344. String::Iterator String::Erase(const String::Iterator& start, const String::Iterator& end)
  345. {
  346. unsigned pos = start - Begin();
  347. if (pos >= length_)
  348. return End();
  349. unsigned length = end - start;
  350. Erase(pos, length);
  351. return Begin() + pos;
  352. }
  353. void String::Resize(unsigned newLength)
  354. {
  355. if (!capacity_)
  356. {
  357. // If zero length requested, do not allocate buffer yet
  358. if (!newLength)
  359. return;
  360. // Calculate initial capacity
  361. capacity_ = newLength + 1;
  362. if (capacity_ < MIN_CAPACITY)
  363. capacity_ = MIN_CAPACITY;
  364. buffer_ = new char[capacity_];
  365. }
  366. else
  367. {
  368. if (newLength && capacity_ < newLength + 1)
  369. {
  370. // Increase the capacity with half each time it is exceeded
  371. while (capacity_ < newLength + 1)
  372. capacity_ += (capacity_ + 1) >> 1;
  373. char* newBuffer = new char[capacity_];
  374. // Move the existing data to the new buffer, then delete the old buffer
  375. if (length_)
  376. CopyChars(newBuffer, buffer_, length_);
  377. delete[] buffer_;
  378. buffer_ = newBuffer;
  379. }
  380. }
  381. buffer_[newLength] = 0;
  382. length_ = newLength;
  383. }
  384. void String::Reserve(unsigned newCapacity)
  385. {
  386. if (newCapacity < length_ + 1)
  387. newCapacity = length_ + 1;
  388. if (newCapacity == capacity_)
  389. return;
  390. char* newBuffer = new char[newCapacity];
  391. // Move the existing data to the new buffer, then delete the old buffer
  392. CopyChars(newBuffer, buffer_, length_ + 1);
  393. if (capacity_)
  394. delete[] buffer_;
  395. capacity_ = newCapacity;
  396. buffer_ = newBuffer;
  397. }
  398. void String::Compact()
  399. {
  400. if (capacity_)
  401. Reserve(length_ + 1);
  402. }
  403. void String::Clear()
  404. {
  405. Resize(0);
  406. }
  407. void String::Swap(String& str)
  408. {
  409. Atomic::Swap(length_, str.length_);
  410. Atomic::Swap(capacity_, str.capacity_);
  411. Atomic::Swap(buffer_, str.buffer_);
  412. }
  413. String String::Substring(unsigned pos) const
  414. {
  415. if (pos < length_)
  416. {
  417. String ret;
  418. ret.Resize(length_ - pos);
  419. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  420. return ret;
  421. }
  422. else
  423. return String();
  424. }
  425. String String::Substring(unsigned pos, unsigned length) const
  426. {
  427. if (pos < length_)
  428. {
  429. String ret;
  430. if (pos + length > length_)
  431. length = length_ - pos;
  432. ret.Resize(length);
  433. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  434. return ret;
  435. }
  436. else
  437. return String();
  438. }
  439. String String::Trimmed() const
  440. {
  441. unsigned trimStart = 0;
  442. unsigned trimEnd = length_;
  443. while (trimStart < trimEnd)
  444. {
  445. char c = buffer_[trimStart];
  446. if (c != ' ' && c != 9)
  447. break;
  448. ++trimStart;
  449. }
  450. while (trimEnd > trimStart)
  451. {
  452. char c = buffer_[trimEnd - 1];
  453. if (c != ' ' && c != 9)
  454. break;
  455. --trimEnd;
  456. }
  457. return Substring(trimStart, trimEnd - trimStart);
  458. }
  459. String String::ToLower() const
  460. {
  461. String ret(*this);
  462. for (unsigned i = 0; i < ret.length_; ++i)
  463. ret[i] = tolower(buffer_[i]);
  464. return ret;
  465. }
  466. String String::ToUpper() const
  467. {
  468. String ret(*this);
  469. for (unsigned i = 0; i < ret.length_; ++i)
  470. ret[i] = toupper(buffer_[i]);
  471. return ret;
  472. }
  473. Vector<String> String::Split(char separator) const
  474. {
  475. return Split(CString(), separator);
  476. }
  477. void String::Join(const Vector<String>& subStrings, String glue)
  478. {
  479. *this = Joined(subStrings, glue);
  480. }
  481. unsigned String::Find(char c, unsigned startPos, bool caseSensitive) const
  482. {
  483. if (caseSensitive)
  484. {
  485. for (unsigned i = startPos; i < length_; ++i)
  486. {
  487. if (buffer_[i] == c)
  488. return i;
  489. }
  490. }
  491. else
  492. {
  493. c = tolower(c);
  494. for (unsigned i = startPos; i < length_; ++i)
  495. {
  496. if (tolower(buffer_[i]) == c)
  497. return i;
  498. }
  499. }
  500. return NPOS;
  501. }
  502. unsigned String::Find(const String& str, unsigned startPos, bool caseSensitive) const
  503. {
  504. if (!str.length_ || str.length_ > length_)
  505. return NPOS;
  506. char first = str.buffer_[0];
  507. if (!caseSensitive)
  508. first = tolower(first);
  509. for (unsigned i = startPos; i <= length_ - str.length_; ++i)
  510. {
  511. char c = buffer_[i];
  512. if (!caseSensitive)
  513. c = tolower(c);
  514. if (c == first)
  515. {
  516. unsigned skip = NPOS;
  517. bool found = true;
  518. for (unsigned j = 1; j < str.length_; ++j)
  519. {
  520. c = buffer_[i + j];
  521. char d = str.buffer_[j];
  522. if (!caseSensitive)
  523. {
  524. c = tolower(c);
  525. d = tolower(d);
  526. }
  527. if (skip == NPOS && c == first)
  528. skip = i + j - 1;
  529. if (c != d)
  530. {
  531. found = false;
  532. if (skip != NPOS)
  533. i = skip;
  534. break;
  535. }
  536. }
  537. if (found)
  538. return i;
  539. }
  540. }
  541. return NPOS;
  542. }
  543. unsigned String::FindLast(char c, unsigned startPos, bool caseSensitive) const
  544. {
  545. if (startPos >= length_)
  546. startPos = length_ - 1;
  547. if (caseSensitive)
  548. {
  549. for (unsigned i = startPos; i < length_; --i)
  550. {
  551. if (buffer_[i] == c)
  552. return i;
  553. }
  554. }
  555. else
  556. {
  557. c = tolower(c);
  558. for (unsigned i = startPos; i < length_; --i)
  559. {
  560. if (tolower(buffer_[i]) == c)
  561. return i;
  562. }
  563. }
  564. return NPOS;
  565. }
  566. unsigned String::FindLast(const String& str, unsigned startPos, bool caseSensitive) const
  567. {
  568. if (!str.length_ || str.length_ > length_)
  569. return NPOS;
  570. if (startPos > length_ - str.length_)
  571. startPos = length_ - str.length_;
  572. char first = str.buffer_[0];
  573. if (!caseSensitive)
  574. first = tolower(first);
  575. for (unsigned i = startPos; i < length_; --i)
  576. {
  577. char c = buffer_[i];
  578. if (!caseSensitive)
  579. c = tolower(c);
  580. if (c == first)
  581. {
  582. bool found = true;
  583. for (unsigned j = 1; j < str.length_; ++j)
  584. {
  585. c = buffer_[i + j];
  586. char d = str.buffer_[j];
  587. if (!caseSensitive)
  588. {
  589. c = tolower(c);
  590. d = tolower(d);
  591. }
  592. if (c != d)
  593. {
  594. found = false;
  595. break;
  596. }
  597. }
  598. if (found)
  599. return i;
  600. }
  601. }
  602. return NPOS;
  603. }
  604. bool String::StartsWith(const String& str, bool caseSensitive) const
  605. {
  606. return Find(str, 0, caseSensitive) == 0;
  607. }
  608. bool String::EndsWith(const String& str, bool caseSensitive) const
  609. {
  610. unsigned pos = FindLast(str, Length() - 1, caseSensitive);
  611. return pos != NPOS && pos == Length() - str.Length();
  612. }
  613. int String::Compare(const String& str, bool caseSensitive) const
  614. {
  615. return Compare(CString(), str.CString(), caseSensitive);
  616. }
  617. int String::Compare(const char* str, bool caseSensitive) const
  618. {
  619. return Compare(CString(), str, caseSensitive);
  620. }
  621. void String::SetUTF8FromLatin1(const char* str)
  622. {
  623. char temp[7];
  624. Clear();
  625. if (!str)
  626. return;
  627. while (*str)
  628. {
  629. char* dest = temp;
  630. EncodeUTF8(dest, *str++);
  631. *dest = 0;
  632. Append(temp);
  633. }
  634. }
  635. void String::SetUTF8FromWChar(const wchar_t* str)
  636. {
  637. char temp[7];
  638. Clear();
  639. if (!str)
  640. return;
  641. #ifdef WIN32
  642. while (*str)
  643. {
  644. unsigned unicodeChar = DecodeUTF16(str);
  645. char* dest = temp;
  646. EncodeUTF8(dest, unicodeChar);
  647. *dest = 0;
  648. Append(temp);
  649. }
  650. #else
  651. while (*str)
  652. {
  653. char* dest = temp;
  654. EncodeUTF8(dest, *str++);
  655. *dest = 0;
  656. Append(temp);
  657. }
  658. #endif
  659. }
  660. unsigned String::LengthUTF8() const
  661. {
  662. unsigned ret = 0;
  663. const char* src = buffer_;
  664. if (!src)
  665. return ret;
  666. const char* end = buffer_ + length_;
  667. while (src < end)
  668. {
  669. DecodeUTF8(src);
  670. ++ret;
  671. }
  672. return ret;
  673. }
  674. unsigned String::ByteOffsetUTF8(unsigned index) const
  675. {
  676. unsigned byteOffset = 0;
  677. unsigned utfPos = 0;
  678. while (utfPos < index && byteOffset < length_)
  679. {
  680. NextUTF8Char(byteOffset);
  681. ++utfPos;
  682. }
  683. return byteOffset;
  684. }
  685. unsigned String::NextUTF8Char(unsigned& byteOffset) const
  686. {
  687. if (!buffer_)
  688. return 0;
  689. const char* src = buffer_ + byteOffset;
  690. unsigned ret = DecodeUTF8(src);
  691. byteOffset = src - buffer_;
  692. return ret;
  693. }
  694. unsigned String::AtUTF8(unsigned index) const
  695. {
  696. unsigned byteOffset = ByteOffsetUTF8(index);
  697. return NextUTF8Char(byteOffset);
  698. }
  699. void String::ReplaceUTF8(unsigned index, unsigned unicodeChar)
  700. {
  701. unsigned utfPos = 0;
  702. unsigned byteOffset = 0;
  703. while (utfPos < index && byteOffset < length_)
  704. {
  705. NextUTF8Char(byteOffset);
  706. ++utfPos;
  707. }
  708. if (utfPos < index)
  709. return;
  710. unsigned beginCharPos = byteOffset;
  711. NextUTF8Char(byteOffset);
  712. char temp[7];
  713. char* dest = temp;
  714. EncodeUTF8(dest, unicodeChar);
  715. *dest = 0;
  716. Replace(beginCharPos, byteOffset - beginCharPos, temp, dest - temp);
  717. }
  718. String& String::AppendUTF8(unsigned unicodeChar)
  719. {
  720. char temp[7];
  721. char* dest = temp;
  722. EncodeUTF8(dest, unicodeChar);
  723. *dest = 0;
  724. return Append(temp);
  725. }
  726. String String::SubstringUTF8(unsigned pos) const
  727. {
  728. unsigned utf8Length = LengthUTF8();
  729. unsigned byteOffset = ByteOffsetUTF8(pos);
  730. String ret;
  731. while (pos < utf8Length)
  732. {
  733. ret.AppendUTF8(NextUTF8Char(byteOffset));
  734. ++pos;
  735. }
  736. return ret;
  737. }
  738. String String::SubstringUTF8(unsigned pos, unsigned length) const
  739. {
  740. unsigned utf8Length = LengthUTF8();
  741. unsigned byteOffset = ByteOffsetUTF8(pos);
  742. unsigned endPos = pos + length;
  743. String ret;
  744. while (pos < endPos && pos < utf8Length)
  745. {
  746. ret.AppendUTF8(NextUTF8Char(byteOffset));
  747. ++pos;
  748. }
  749. return ret;
  750. }
  751. void String::EncodeUTF8(char*& dest, unsigned unicodeChar)
  752. {
  753. if (unicodeChar < 0x80)
  754. *dest++ = unicodeChar;
  755. else if (unicodeChar < 0x800)
  756. {
  757. dest[0] = 0xc0 | ((unicodeChar >> 6) & 0x1f);
  758. dest[1] = 0x80 | (unicodeChar & 0x3f);
  759. dest += 2;
  760. }
  761. else if (unicodeChar < 0x10000)
  762. {
  763. dest[0] = 0xe0 | ((unicodeChar >> 12) & 0xf);
  764. dest[1] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  765. dest[2] = 0x80 | (unicodeChar & 0x3f);
  766. dest += 3;
  767. }
  768. else if (unicodeChar < 0x200000)
  769. {
  770. dest[0] = 0xf0 | ((unicodeChar >> 18) & 0x7);
  771. dest[1] = 0x80 | ((unicodeChar >> 12) & 0x3f);
  772. dest[2] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  773. dest[3] = 0x80 | (unicodeChar & 0x3f);
  774. dest += 4;
  775. }
  776. else if (unicodeChar < 0x4000000)
  777. {
  778. dest[0] = 0xf8 | ((unicodeChar >> 24) & 0x3);
  779. dest[1] = 0x80 | ((unicodeChar >> 18) & 0x3f);
  780. dest[2] = 0x80 | ((unicodeChar >> 12) & 0x3f);
  781. dest[3] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  782. dest[4] = 0x80 | (unicodeChar & 0x3f);
  783. dest += 5;
  784. }
  785. else
  786. {
  787. dest[0] = 0xfc | ((unicodeChar >> 30) & 0x1);
  788. dest[1] = 0x80 | ((unicodeChar >> 24) & 0x3f);
  789. dest[2] = 0x80 | ((unicodeChar >> 18) & 0x3f);
  790. dest[3] = 0x80 | ((unicodeChar >> 12) & 0x3f);
  791. dest[4] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  792. dest[5] = 0x80 | (unicodeChar & 0x3f);
  793. dest += 6;
  794. }
  795. }
  796. #define GET_NEXT_CONTINUATION_BYTE(ptr) *ptr; if ((unsigned char)*ptr < 0x80 || (unsigned char)*ptr >= 0xc0) return '?'; else ++ptr;
  797. unsigned String::DecodeUTF8(const char*& src)
  798. {
  799. if (src == 0)
  800. return 0;
  801. unsigned char char1 = *src++;
  802. // Check if we are in the middle of a UTF8 character
  803. if (char1 >= 0x80 && char1 < 0xc0)
  804. {
  805. while ((unsigned char)*src >= 0x80 && (unsigned char)*src < 0xc0)
  806. ++src;
  807. return '?';
  808. }
  809. if (char1 < 0x80)
  810. return char1;
  811. else if (char1 < 0xe0)
  812. {
  813. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  814. return (char2 & 0x3f) | ((char1 & 0x1f) << 6);
  815. }
  816. else if (char1 < 0xf0)
  817. {
  818. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  819. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  820. return (char3 & 0x3f) | ((char2 & 0x3f) << 6) | ((char1 & 0xf) << 12);
  821. }
  822. else if (char1 < 0xf8)
  823. {
  824. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  825. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  826. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  827. return (char4 & 0x3f) | ((char3 & 0x3f) << 6) | ((char2 & 0x3f) << 12) | ((char1 & 0x7) << 18);
  828. }
  829. else if (char1 < 0xfc)
  830. {
  831. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  832. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  833. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  834. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  835. return (char5 & 0x3f) | ((char4 & 0x3f) << 6) | ((char3 & 0x3f) << 12) | ((char2 & 0x3f) << 18) | ((char1 & 0x3) << 24);
  836. }
  837. else
  838. {
  839. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  840. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  841. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  842. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  843. unsigned char char6 = GET_NEXT_CONTINUATION_BYTE(src);
  844. return (char6 & 0x3f) | ((char5 & 0x3f) << 6) | ((char4 & 0x3f) << 12) | ((char3 & 0x3f) << 18) | ((char2 & 0x3f) << 24) |
  845. ((char1 & 0x1) << 30);
  846. }
  847. }
  848. #ifdef WIN32
  849. void String::EncodeUTF16(wchar_t*& dest, unsigned unicodeChar)
  850. {
  851. if (unicodeChar < 0x10000)
  852. *dest++ = unicodeChar;
  853. else
  854. {
  855. unicodeChar -= 0x10000;
  856. *dest++ = 0xd800 | ((unicodeChar >> 10) & 0x3ff);
  857. *dest++ = 0xdc00 | (unicodeChar & 0x3ff);
  858. }
  859. }
  860. unsigned String::DecodeUTF16(const wchar_t*& src)
  861. {
  862. if (src == 0)
  863. return 0;
  864. unsigned short word1 = *src;
  865. // Check if we are at a low surrogate
  866. word1 = *src++;
  867. if (word1 >= 0xdc00 && word1 < 0xe000)
  868. {
  869. while (*src >= 0xdc00 && *src < 0xe000)
  870. ++src;
  871. return '?';
  872. }
  873. if (word1 < 0xd800 || word1 >= 0xe00)
  874. return word1;
  875. else
  876. {
  877. unsigned short word2 = *src++;
  878. if (word2 < 0xdc00 || word2 >= 0xe000)
  879. {
  880. --src;
  881. return '?';
  882. }
  883. else
  884. return ((word1 & 0x3ff) << 10) | (word2 & 0x3ff) | 0x10000;
  885. }
  886. }
  887. #endif
  888. Vector<String> String::Split(const char* str, char separator)
  889. {
  890. Vector<String> ret;
  891. unsigned pos = 0;
  892. unsigned length = CStringLength(str);
  893. while (pos < length)
  894. {
  895. if (str[pos] != separator)
  896. break;
  897. ++pos;
  898. }
  899. while (pos < length)
  900. {
  901. unsigned start = pos;
  902. while (start < length)
  903. {
  904. if (str[start] == separator)
  905. break;
  906. ++start;
  907. }
  908. if (start == length)
  909. {
  910. ret.Push(String(&str[pos]));
  911. break;
  912. }
  913. unsigned end = start;
  914. while (end < length)
  915. {
  916. if (str[end] != separator)
  917. break;
  918. ++end;
  919. }
  920. ret.Push(String(&str[pos], start - pos));
  921. pos = end;
  922. }
  923. return ret;
  924. }
  925. String String::Joined(const Vector<String>& subStrings, String glue)
  926. {
  927. if (subStrings.Empty())
  928. return String();
  929. String joinedString(subStrings[0]);
  930. for (unsigned i = 1; i < subStrings.Size(); ++i)
  931. joinedString.Append(glue).Append(subStrings[i]);
  932. return joinedString;
  933. }
  934. String& String::AppendWithFormat(const char* formatString, ... )
  935. {
  936. va_list args;
  937. va_start(args, formatString);
  938. AppendWithFormatArgs(formatString, args);
  939. va_end(args);
  940. return *this;
  941. }
  942. String& String::AppendWithFormatArgs(const char* formatString, va_list args)
  943. {
  944. int pos = 0, lastPos = 0;
  945. int length = strlen(formatString);
  946. while (true)
  947. {
  948. // Scan the format string and find %a argument where a is one of d, f, s ...
  949. while (pos < length && formatString[pos] != '%') pos++;
  950. Append(formatString + lastPos, pos - lastPos);
  951. if (pos >= length)
  952. return *this;
  953. char arg = formatString[pos + 1];
  954. pos += 2;
  955. lastPos = pos;
  956. switch (arg)
  957. {
  958. // Integer
  959. case 'd':
  960. case 'i':
  961. {
  962. int arg = va_arg(args, int);
  963. Append(String(arg));
  964. break;
  965. }
  966. // Unsigned
  967. case 'u':
  968. {
  969. unsigned arg = va_arg(args, unsigned);
  970. Append(String(arg));
  971. break;
  972. }
  973. // Real
  974. case 'f':
  975. {
  976. double arg = va_arg(args, double);
  977. Append(String(arg));
  978. break;
  979. }
  980. // Character
  981. case 'c':
  982. {
  983. int arg = va_arg(args, int);
  984. Append(arg);
  985. break;
  986. }
  987. // C string
  988. case 's':
  989. {
  990. char* arg = va_arg(args, char*);
  991. Append(arg);
  992. break;
  993. }
  994. // Hex
  995. case 'x':
  996. {
  997. char buf[CONVERSION_BUFFER_LENGTH];
  998. int arg = va_arg(args, int);
  999. int arglen = ::sprintf(buf, "%x", arg);
  1000. Append(buf, arglen);
  1001. break;
  1002. }
  1003. // Pointer
  1004. case 'p':
  1005. {
  1006. char buf[CONVERSION_BUFFER_LENGTH];
  1007. int arg = va_arg(args, int);
  1008. int arglen = ::sprintf(buf, "%p", reinterpret_cast<void*>(arg));
  1009. Append(buf, arglen);
  1010. break;
  1011. }
  1012. case '%':
  1013. {
  1014. Append("%", 1);
  1015. break;
  1016. }
  1017. }
  1018. }
  1019. return *this;
  1020. }
  1021. int String::Compare(const char* lhs, const char* rhs, bool caseSensitive)
  1022. {
  1023. if (!lhs || !rhs)
  1024. return lhs ? 1 : (rhs ? -1 : 0);
  1025. if (caseSensitive)
  1026. return strcmp(lhs, rhs);
  1027. else
  1028. {
  1029. for (;;)
  1030. {
  1031. char l = tolower(*lhs);
  1032. char r = tolower(*rhs);
  1033. if (!l || !r)
  1034. return l ? 1 : (r ? -1 : 0);
  1035. if (l < r)
  1036. return -1;
  1037. if (l > r)
  1038. return 1;
  1039. ++lhs;
  1040. ++rhs;
  1041. }
  1042. }
  1043. }
  1044. void String::Replace(unsigned pos, unsigned length, const char* srcStart, unsigned srcLength)
  1045. {
  1046. int delta = (int)srcLength - (int)length;
  1047. if (pos + length < length_)
  1048. {
  1049. if (delta < 0)
  1050. {
  1051. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  1052. Resize(length_ + delta);
  1053. }
  1054. if (delta > 0)
  1055. {
  1056. Resize(length_ + delta);
  1057. MoveRange(pos + srcLength, pos + length, length_ - pos - length - delta);
  1058. }
  1059. }
  1060. else
  1061. Resize(length_ + delta);
  1062. CopyChars(buffer_ + pos, srcStart, srcLength);
  1063. }
  1064. WString::WString() :
  1065. length_(0),
  1066. buffer_(0)
  1067. {
  1068. }
  1069. WString::WString(const String& str) :
  1070. length_(0),
  1071. buffer_(0)
  1072. {
  1073. #ifdef WIN32
  1074. unsigned neededSize = 0;
  1075. wchar_t temp[3];
  1076. unsigned byteOffset = 0;
  1077. while (byteOffset < str.Length())
  1078. {
  1079. wchar_t* dest = temp;
  1080. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  1081. neededSize += dest - temp;
  1082. }
  1083. Resize(neededSize);
  1084. byteOffset = 0;
  1085. wchar_t* dest = buffer_;
  1086. while (byteOffset < str.Length())
  1087. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  1088. #else
  1089. Resize(str.LengthUTF8());
  1090. unsigned byteOffset = 0;
  1091. wchar_t* dest = buffer_;
  1092. while (byteOffset < str.Length())
  1093. *dest++ = str.NextUTF8Char(byteOffset);
  1094. #endif
  1095. }
  1096. WString::~WString()
  1097. {
  1098. delete[] buffer_;
  1099. }
  1100. void WString::Resize(unsigned newLength)
  1101. {
  1102. if (!newLength)
  1103. {
  1104. delete[] buffer_;
  1105. buffer_ = 0;
  1106. length_ = 0;
  1107. }
  1108. else
  1109. {
  1110. wchar_t* newBuffer = new wchar_t[newLength + 1];
  1111. if (buffer_)
  1112. {
  1113. unsigned copyLength = length_ < newLength ? length_ : newLength;
  1114. memcpy(newBuffer, buffer_, copyLength * sizeof(wchar_t));
  1115. delete[] buffer_;
  1116. }
  1117. newBuffer[newLength] = 0;
  1118. buffer_ = newBuffer;
  1119. length_ = newLength;
  1120. }
  1121. }
  1122. }