Str.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. //
  2. // Copyright (c) 2008-2013 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #include "Str.h"
  23. #include "Swap.h"
  24. #include <cstdio>
  25. #include "DebugNew.h"
  26. namespace Urho3D
  27. {
  28. char String::endZero = 0;
  29. const String String::EMPTY;
  30. String::String(const WString& str) :
  31. length_(0),
  32. capacity_(0),
  33. buffer_(&endZero)
  34. {
  35. SetUTF8FromWChar(str.CString());
  36. }
  37. String::String(int value) :
  38. length_(0),
  39. capacity_(0),
  40. buffer_(&endZero)
  41. {
  42. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  43. sprintf(tempBuffer, "%d", value);
  44. *this = tempBuffer;
  45. }
  46. String::String(short value) :
  47. length_(0),
  48. capacity_(0),
  49. buffer_(&endZero)
  50. {
  51. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  52. sprintf(tempBuffer, "%d", value);
  53. *this = tempBuffer;
  54. }
  55. String::String(unsigned value) :
  56. length_(0),
  57. capacity_(0),
  58. buffer_(&endZero)
  59. {
  60. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  61. sprintf(tempBuffer, "%u", value);
  62. *this = tempBuffer;
  63. }
  64. String::String(unsigned short value) :
  65. length_(0),
  66. capacity_(0),
  67. buffer_(&endZero)
  68. {
  69. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  70. sprintf(tempBuffer, "%u", value);
  71. *this = tempBuffer;
  72. }
  73. String::String(float value) :
  74. length_(0),
  75. capacity_(0),
  76. buffer_(&endZero)
  77. {
  78. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  79. sprintf(tempBuffer, "%g", value);
  80. *this = tempBuffer;
  81. }
  82. String::String(double value) :
  83. length_(0),
  84. capacity_(0),
  85. buffer_(&endZero)
  86. {
  87. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  88. sprintf(tempBuffer, "%g", value);
  89. *this = tempBuffer;
  90. }
  91. String::String(bool value) :
  92. length_(0),
  93. capacity_(0),
  94. buffer_(&endZero)
  95. {
  96. if (value)
  97. *this = "true";
  98. else
  99. *this = "false";
  100. }
  101. String::String(char value) :
  102. length_(0),
  103. capacity_(0),
  104. buffer_(&endZero)
  105. {
  106. Resize(1);
  107. buffer_[0] = value;
  108. }
  109. String::String(char value, unsigned length) :
  110. length_(0),
  111. capacity_(0),
  112. buffer_(&endZero)
  113. {
  114. Resize(length);
  115. for (unsigned i = 0; i < length; ++i)
  116. buffer_[i] = value;
  117. }
  118. String& String::operator += (int rhs)
  119. {
  120. return *this += String(rhs);
  121. }
  122. String& String::operator += (short rhs)
  123. {
  124. return *this += String(rhs);
  125. }
  126. String& String::operator += (unsigned rhs)
  127. {
  128. return *this += String(rhs);
  129. }
  130. String& String::operator += (unsigned short rhs)
  131. {
  132. return *this += String(rhs);
  133. }
  134. String& String::operator += (float rhs)
  135. {
  136. return *this += String(rhs);
  137. }
  138. String& String::operator += (bool rhs)
  139. {
  140. return *this += String(rhs);
  141. }
  142. void String::Replace(char replaceThis, char replaceWith)
  143. {
  144. for (unsigned i = 0; i < length_; ++i)
  145. {
  146. if (buffer_[i] == replaceThis)
  147. buffer_[i] = replaceWith;
  148. }
  149. }
  150. void String::Replace(const String& replaceThis, const String& replaceWith)
  151. {
  152. unsigned nextPos = 0;
  153. while (nextPos < length_)
  154. {
  155. unsigned pos = Find(replaceThis, nextPos);
  156. if (pos == NPOS)
  157. break;
  158. Replace(pos, replaceThis.length_, replaceWith);
  159. nextPos = pos + replaceWith.length_;
  160. }
  161. }
  162. void String::Replace(unsigned pos, unsigned length, const String& str)
  163. {
  164. // If substring is illegal, do nothing
  165. if (pos + length > length_)
  166. return;
  167. Replace(pos, length, str.buffer_, str.length_);
  168. }
  169. String::Iterator String::Replace(const String::Iterator& start, const String::Iterator& end, const String& replaceWith)
  170. {
  171. unsigned pos = start - Begin();
  172. if (pos >= length_)
  173. return End();
  174. unsigned length = end - start;
  175. Replace(pos, length, replaceWith);
  176. return Begin() + pos;
  177. }
  178. String String::Replaced(char replaceThis, char replaceWith) const
  179. {
  180. String ret(*this);
  181. ret.Replace(replaceThis, replaceWith);
  182. return ret;
  183. }
  184. String String::Replaced(const String& replaceThis, const String& replaceWith) const
  185. {
  186. String ret(*this);
  187. ret.Replace(replaceThis, replaceWith);
  188. return ret;
  189. }
  190. String& String::Append(const String& str)
  191. {
  192. return *this += str;
  193. }
  194. String& String::Append(const char* str)
  195. {
  196. return *this += str;
  197. }
  198. String& String::Append(char c)
  199. {
  200. return *this += c;
  201. }
  202. String& String::Append(const char* str, unsigned length)
  203. {
  204. if (str)
  205. {
  206. unsigned oldLength = length_;
  207. Resize(oldLength + length);
  208. CopyChars(&buffer_[oldLength], str, length);
  209. }
  210. return *this;
  211. }
  212. void String::Insert(unsigned pos, const String& str)
  213. {
  214. if (pos > length_)
  215. pos = length_;
  216. if (pos == length_)
  217. (*this) += str;
  218. else
  219. Replace(pos, 0, str);
  220. }
  221. void String::Insert(unsigned pos, char c)
  222. {
  223. if (pos > length_)
  224. pos = length_;
  225. if (pos == length_)
  226. (*this) += c;
  227. else
  228. {
  229. unsigned oldLength = length_;
  230. Resize(length_ + 1);
  231. MoveRange(pos + 1, pos, oldLength - pos);
  232. buffer_[pos] = c;
  233. }
  234. }
  235. String::Iterator String::Insert(const String::Iterator& dest, const String& str)
  236. {
  237. unsigned pos = dest - Begin();
  238. if (pos > length_)
  239. pos = length_;
  240. Insert(pos, str);
  241. return Begin() + pos;
  242. }
  243. String::Iterator String::Insert(const String::Iterator& dest, const String::Iterator& start, const String::Iterator& end)
  244. {
  245. unsigned pos = dest - Begin();
  246. if (pos > length_)
  247. pos = length_;
  248. unsigned length = end - start;
  249. Replace(pos, 0, &(*start), length);
  250. return Begin() + pos;
  251. }
  252. String::Iterator String::Insert(const String::Iterator& dest, char c)
  253. {
  254. unsigned pos = dest - Begin();
  255. if (pos > length_)
  256. pos = length_;
  257. Insert(pos, c);
  258. return Begin() + pos;
  259. }
  260. void String::Erase(unsigned pos, unsigned length)
  261. {
  262. Replace(pos, length, String::EMPTY);
  263. }
  264. String::Iterator String::Erase(const String::Iterator& it)
  265. {
  266. unsigned pos = it - Begin();
  267. if (pos >= length_)
  268. return End();
  269. Erase(pos);
  270. return Begin() + pos;
  271. }
  272. String::Iterator String::Erase(const String::Iterator& start, const String::Iterator& end)
  273. {
  274. unsigned pos = start - Begin();
  275. if (pos >= length_)
  276. return End();
  277. unsigned length = end - start;
  278. Erase(pos, length);
  279. return Begin() + pos;
  280. }
  281. void String::Resize(unsigned newLength)
  282. {
  283. if (!capacity_)
  284. {
  285. // Calculate initial capacity
  286. capacity_ = newLength + 1;
  287. if (capacity_ < MIN_CAPACITY)
  288. capacity_ = MIN_CAPACITY;
  289. buffer_ = new char[capacity_];
  290. }
  291. else
  292. {
  293. if (newLength && capacity_ < newLength + 1)
  294. {
  295. // Increase the capacity with half each time it is exceeded
  296. while (capacity_ < newLength + 1)
  297. capacity_ += (capacity_ + 1) >> 1;
  298. char* newBuffer = new char[capacity_];
  299. // Move the existing data to the new buffer, then delete the old buffer
  300. if (length_)
  301. CopyChars(newBuffer, buffer_, length_);
  302. delete[] buffer_;
  303. buffer_ = newBuffer;
  304. }
  305. }
  306. buffer_[newLength] = 0;
  307. length_ = newLength;
  308. }
  309. void String::Reserve(unsigned newCapacity)
  310. {
  311. if (newCapacity < length_ + 1)
  312. newCapacity = length_ + 1;
  313. if (newCapacity == capacity_)
  314. return;
  315. char* newBuffer = new char[newCapacity];
  316. // Move the existing data to the new buffer, then delete the old buffer
  317. CopyChars(newBuffer, buffer_, length_ + 1);
  318. if (capacity_)
  319. delete[] buffer_;
  320. capacity_ = newCapacity;
  321. buffer_ = newBuffer;
  322. }
  323. void String::Compact()
  324. {
  325. if (capacity_)
  326. Reserve(length_ + 1);
  327. }
  328. void String::Clear()
  329. {
  330. Resize(0);
  331. }
  332. void String::Swap(String& str)
  333. {
  334. Urho3D::Swap(length_, str.length_);
  335. Urho3D::Swap(capacity_, str.capacity_);
  336. Urho3D::Swap(buffer_, str.buffer_);
  337. }
  338. String String::Substring(unsigned pos) const
  339. {
  340. if (pos < length_)
  341. {
  342. String ret;
  343. ret.Resize(length_ - pos);
  344. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  345. return ret;
  346. }
  347. else
  348. return String();
  349. }
  350. String String::Substring(unsigned pos, unsigned length) const
  351. {
  352. if (pos < length_)
  353. {
  354. String ret;
  355. if (pos + length > length_)
  356. length = length_ - pos;
  357. ret.Resize(length);
  358. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  359. return ret;
  360. }
  361. else
  362. return String();
  363. }
  364. String String::Trimmed() const
  365. {
  366. unsigned trimStart = 0;
  367. unsigned trimEnd = length_;
  368. while (trimStart < trimEnd)
  369. {
  370. char c = buffer_[trimStart];
  371. if (c != ' ' && c != 9)
  372. break;
  373. ++trimStart;
  374. }
  375. while (trimEnd > trimStart)
  376. {
  377. char c = buffer_[trimEnd - 1];
  378. if (c != ' ' && c != 9)
  379. break;
  380. --trimEnd;
  381. }
  382. return Substring(trimStart, trimEnd - trimStart);
  383. }
  384. String String::ToLower() const
  385. {
  386. String ret(*this);
  387. for (unsigned i = 0; i < ret.length_; ++i)
  388. ret[i] = tolower(buffer_[i]);
  389. return ret;
  390. }
  391. String String::ToUpper() const
  392. {
  393. String ret(*this);
  394. for (unsigned i = 0; i < ret.length_; ++i)
  395. ret[i] = toupper(buffer_[i]);
  396. return ret;
  397. }
  398. Vector<String> String::Split(char separator) const
  399. {
  400. return Split(CString(), separator);
  401. }
  402. void String::Join(const Vector<String>& subStrings, String glue)
  403. {
  404. *this = Joined(subStrings, glue);
  405. }
  406. unsigned String::Find(char c, unsigned startPos) const
  407. {
  408. for (unsigned i = startPos; i < length_; ++i)
  409. {
  410. if (buffer_[i] == c)
  411. return i;
  412. }
  413. return NPOS;
  414. }
  415. unsigned String::Find(const String& str, unsigned startPos) const
  416. {
  417. if (!str.length_ || str.length_ > length_)
  418. return NPOS;
  419. char first = str.buffer_[0];
  420. for (unsigned i = startPos; i <= length_ - str.length_; ++i)
  421. {
  422. if (buffer_[i] == first)
  423. {
  424. unsigned skip = NPOS;
  425. bool found = true;
  426. for (unsigned j = 1; j < str.length_; ++j)
  427. {
  428. char c = buffer_[i + j];
  429. if (skip == NPOS && c == first)
  430. skip = i + j - 1;
  431. if (c != str.buffer_[j])
  432. {
  433. found = false;
  434. if (skip != NPOS)
  435. i = skip;
  436. break;
  437. }
  438. }
  439. if (found)
  440. return i;
  441. }
  442. }
  443. return NPOS;
  444. }
  445. unsigned String::FindLast(char c, unsigned startPos) const
  446. {
  447. if (startPos >= length_)
  448. startPos = length_ - 1;
  449. for (unsigned i = startPos; i < length_; --i)
  450. {
  451. if (buffer_[i] == c)
  452. return i;
  453. }
  454. return NPOS;
  455. }
  456. unsigned String::FindLast(const String& str, unsigned startPos) const
  457. {
  458. if (!str.length_ || str.length_ > length_)
  459. return NPOS;
  460. if (startPos > length_ - str.length_)
  461. startPos = length_ - str.length_;
  462. char first = str.buffer_[0];
  463. for (unsigned i = startPos; i < length_; --i)
  464. {
  465. if (buffer_[i] == first)
  466. {
  467. bool found = true;
  468. for (unsigned j = 1; j < str.length_; ++j)
  469. {
  470. char c = buffer_[i + j];
  471. if (c != str.buffer_[j])
  472. {
  473. found = false;
  474. break;
  475. }
  476. }
  477. if (found)
  478. return i;
  479. }
  480. }
  481. return NPOS;
  482. }
  483. bool String::StartsWith(const String& str) const
  484. {
  485. return Find(str) == 0;
  486. }
  487. bool String::EndsWith(const String& str) const
  488. {
  489. return FindLast(str) == Length() - str.Length();
  490. }
  491. int String::Compare(const String& str, bool caseSensitive) const
  492. {
  493. return Compare(CString(), str.CString(), caseSensitive);
  494. }
  495. int String::Compare(const char* str, bool caseSensitive) const
  496. {
  497. return Compare(CString(), str, caseSensitive);
  498. }
  499. void String::SetUTF8FromLatin1(const char* str)
  500. {
  501. char temp[7];
  502. Clear();
  503. if (!str)
  504. return;
  505. while (*str)
  506. {
  507. char* dest = temp;
  508. EncodeUTF8(dest, *str++);
  509. *dest = 0;
  510. Append(temp);
  511. }
  512. }
  513. void String::SetUTF8FromWChar(const wchar_t* str)
  514. {
  515. char temp[7];
  516. Clear();
  517. if (!str)
  518. return;
  519. #ifdef WIN32
  520. while (*str)
  521. {
  522. unsigned unicodeChar = DecodeUTF16(str);
  523. char* dest = temp;
  524. EncodeUTF8(dest, unicodeChar);
  525. *dest = 0;
  526. Append(temp);
  527. }
  528. #else
  529. while (*str)
  530. {
  531. char* dest = temp;
  532. EncodeUTF8(dest, *str++);
  533. *dest = 0;
  534. Append(temp);
  535. }
  536. #endif
  537. }
  538. unsigned String::LengthUTF8() const
  539. {
  540. unsigned ret = 0;
  541. const char* src = buffer_;
  542. if (!src)
  543. return ret;
  544. const char* end = buffer_ + length_;
  545. while (src < end)
  546. {
  547. DecodeUTF8(src);
  548. ++ret;
  549. }
  550. return ret;
  551. }
  552. unsigned String::ByteOffsetUTF8(unsigned index) const
  553. {
  554. unsigned byteOffset = 0;
  555. unsigned utfPos = 0;
  556. while (utfPos < index && byteOffset < length_)
  557. {
  558. NextUTF8Char(byteOffset);
  559. ++utfPos;
  560. }
  561. return byteOffset;
  562. }
  563. unsigned String::NextUTF8Char(unsigned& byteOffset) const
  564. {
  565. if (!buffer_)
  566. return 0;
  567. const char* src = buffer_ + byteOffset;
  568. unsigned ret = DecodeUTF8(src);
  569. byteOffset = src - buffer_;
  570. return ret;
  571. }
  572. unsigned String::AtUTF8(unsigned index) const
  573. {
  574. unsigned byteOffset = ByteOffsetUTF8(index);
  575. return NextUTF8Char(byteOffset);
  576. }
  577. void String::ReplaceUTF8(unsigned index, unsigned unicodeChar)
  578. {
  579. unsigned utfPos = 0;
  580. unsigned byteOffset = 0;
  581. while (utfPos < index && byteOffset < length_)
  582. {
  583. NextUTF8Char(byteOffset);
  584. ++utfPos;
  585. }
  586. if (utfPos < index)
  587. return;
  588. unsigned beginCharPos = byteOffset;
  589. NextUTF8Char(byteOffset);
  590. char temp[7];
  591. char* dest = temp;
  592. EncodeUTF8(dest, unicodeChar);
  593. *dest = 0;
  594. Replace(beginCharPos, byteOffset - beginCharPos, temp, dest - temp);
  595. }
  596. String& String::AppendUTF8(unsigned unicodeChar)
  597. {
  598. char temp[7];
  599. char* dest = temp;
  600. EncodeUTF8(dest, unicodeChar);
  601. *dest = 0;
  602. return Append(temp);
  603. }
  604. String String::SubstringUTF8(unsigned pos) const
  605. {
  606. unsigned utf8Length = LengthUTF8();
  607. unsigned byteOffset = ByteOffsetUTF8(pos);
  608. String ret;
  609. while (pos < utf8Length)
  610. {
  611. ret.AppendUTF8(NextUTF8Char(byteOffset));
  612. ++pos;
  613. }
  614. return ret;
  615. }
  616. String String::SubstringUTF8(unsigned pos, unsigned length) const
  617. {
  618. unsigned utf8Length = LengthUTF8();
  619. unsigned byteOffset = ByteOffsetUTF8(pos);
  620. unsigned endPos = pos + length;
  621. String ret;
  622. while (pos < endPos && pos < utf8Length)
  623. {
  624. ret.AppendUTF8(NextUTF8Char(byteOffset));
  625. ++pos;
  626. }
  627. return ret;
  628. }
  629. void String::EncodeUTF8(char*& dest, unsigned unicodeChar)
  630. {
  631. if (unicodeChar < 0x80)
  632. *dest++ = unicodeChar;
  633. else if (unicodeChar < 0x800)
  634. {
  635. *dest++ = 0xc0 | ((unicodeChar >> 6) & 0x1f);
  636. *dest++ = 0x80 | (unicodeChar & 0x3f);
  637. }
  638. else if (unicodeChar < 0x10000)
  639. {
  640. *dest++ = 0xe0 | ((unicodeChar >> 12) & 0xf);
  641. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  642. *dest++ = 0x80 | (unicodeChar & 0x3f);
  643. }
  644. else if (unicodeChar < 0x200000)
  645. {
  646. *dest++ = 0xf0 | ((unicodeChar >> 18) & 0x7);
  647. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  648. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  649. *dest++ = 0x80 | (unicodeChar & 0x3f);
  650. }
  651. else if (unicodeChar < 0x4000000)
  652. {
  653. *dest++ = 0xf8 | ((unicodeChar >> 24) & 0x3);
  654. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  655. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  656. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  657. *dest++ = 0x80 | (unicodeChar & 0x3f);
  658. }
  659. else
  660. {
  661. *dest++ = 0xfc | ((unicodeChar >> 30) & 0x1);
  662. *dest++ = 0x80 | ((unicodeChar >> 24) & 0x3f);
  663. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  664. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  665. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  666. *dest++ = 0x80 | (unicodeChar & 0x3f);
  667. }
  668. }
  669. #define GET_NEXT_CONTINUATION_BYTE(ptr) *ptr; if ((unsigned char)*ptr < 0x80 || (unsigned char)*ptr >= 0xc0) return '?'; else ++ptr;
  670. unsigned String::DecodeUTF8(const char*& src)
  671. {
  672. if (src == 0)
  673. return 0;
  674. unsigned char char1 = *src++;
  675. // Check if we are in the middle of a UTF8 character
  676. if (char1 >= 0x80 && char1 < 0xc0)
  677. {
  678. while ((unsigned char)*src >= 0x80 && (unsigned char)*src < 0xc0)
  679. ++src;
  680. return '?';
  681. }
  682. if (char1 < 0x80)
  683. return char1;
  684. else if (char1 < 0xe0)
  685. {
  686. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  687. return (char2 & 0x3f) | ((char1 & 0x1f) << 6);
  688. }
  689. else if (char1 < 0xf0)
  690. {
  691. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  692. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  693. return (char3 & 0x3f) | ((char2 & 0x3f) << 6) | ((char1 & 0xf) << 12);
  694. }
  695. else if (char1 < 0xf8)
  696. {
  697. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  698. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  699. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  700. return (char4 & 0x3f) | ((char3 & 0x3f) << 6) | ((char2 & 0x3f) << 12) | ((char1 & 0x7) << 18);
  701. }
  702. else if (char1 < 0xfc)
  703. {
  704. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  705. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  706. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  707. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  708. return (char5 & 0x3f) | ((char4 & 0x3f) << 6) | ((char3 & 0x3f) << 12) | ((char2 & 0x3f) << 18) | ((char1 & 0x3) << 24);
  709. }
  710. else
  711. {
  712. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  713. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  714. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  715. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  716. unsigned char char6 = GET_NEXT_CONTINUATION_BYTE(src);
  717. return (char6 & 0x3f) | ((char5 & 0x3f) << 6) | ((char4 & 0x3f) << 12) | ((char3 & 0x3f) << 18) | ((char2 & 0x3f) << 24) |
  718. ((char1 & 0x1) << 30);
  719. }
  720. }
  721. #ifdef WIN32
  722. void String::EncodeUTF16(wchar_t*& dest, unsigned unicodeChar)
  723. {
  724. if (unicodeChar < 0x10000)
  725. *dest++ = unicodeChar;
  726. else
  727. {
  728. unicodeChar -= 0x10000;
  729. *dest++ = 0xd800 | ((unicodeChar >> 10) & 0x3ff);
  730. *dest++ = 0xdc00 | (unicodeChar & 0x3ff);
  731. }
  732. }
  733. unsigned String::DecodeUTF16(const wchar_t*& src)
  734. {
  735. if (src == 0)
  736. return 0;
  737. unsigned short word1 = *src;
  738. // Check if we are at a low surrogate
  739. word1 = *src++;
  740. if (word1 >= 0xdc00 && word1 < 0xe000)
  741. {
  742. while (*src >= 0xdc00 && *src < 0xe000)
  743. ++src;
  744. return '?';
  745. }
  746. if (word1 < 0xd800 || word1 >= 0xe00)
  747. return word1;
  748. else
  749. {
  750. unsigned short word2 = *src++;
  751. if (word2 < 0xdc00 || word2 >= 0xe000)
  752. {
  753. --src;
  754. return '?';
  755. }
  756. else
  757. return ((word1 & 0x3ff) << 10) | (word2 & 0x3ff) | 0x10000;
  758. }
  759. }
  760. #endif
  761. Vector<String> String::Split(const char* str, char separator)
  762. {
  763. Vector<String> ret;
  764. unsigned pos = 0;
  765. unsigned length = CStringLength(str);
  766. while (pos < length)
  767. {
  768. if (str[pos] != separator)
  769. break;
  770. ++pos;
  771. }
  772. while (pos < length)
  773. {
  774. unsigned start = pos;
  775. while (start < length)
  776. {
  777. if (str[start] == separator)
  778. break;
  779. ++start;
  780. }
  781. if (start == length)
  782. {
  783. ret.Push(String(&str[pos]));
  784. break;
  785. }
  786. unsigned end = start;
  787. while (end < length)
  788. {
  789. if (str[end] != separator)
  790. break;
  791. ++end;
  792. }
  793. ret.Push(String(&str[pos], start - pos));
  794. pos = end;
  795. }
  796. return ret;
  797. }
  798. String String::Joined(const Vector<String>& subStrings, String glue)
  799. {
  800. if (subStrings.Empty())
  801. return String();
  802. String joinedString(subStrings[0]);
  803. for (unsigned i = 1; i < subStrings.Size(); ++i)
  804. joinedString.Append(glue).Append(subStrings[i]);
  805. return joinedString;
  806. }
  807. String& String::AppendWithFormat(const char* formatString, ... )
  808. {
  809. va_list args;
  810. va_start(args, formatString);
  811. AppendWithFormatArgs(formatString, args);
  812. va_end(args);
  813. return *this;
  814. }
  815. String& String::AppendWithFormatArgs(const char* formatString, va_list args)
  816. {
  817. int pos = 0, lastPos = 0;
  818. int length = strlen(formatString);
  819. while (true)
  820. {
  821. // Scan the format string and find %a argument where a is one of d, f, s ...
  822. while (pos < length && formatString[pos] != '%') pos++;
  823. Append(formatString + lastPos, pos - lastPos);
  824. if (pos >= length)
  825. return *this;
  826. char arg = formatString[pos + 1];
  827. pos += 2;
  828. lastPos = pos;
  829. switch (arg)
  830. {
  831. // Integer
  832. case 'd':
  833. case 'i':
  834. {
  835. int arg = va_arg(args, int);
  836. Append(String(arg));
  837. break;
  838. }
  839. // Unsigned
  840. case 'u':
  841. {
  842. unsigned arg = va_arg(args, unsigned);
  843. Append(String(arg));
  844. break;
  845. }
  846. // Real
  847. case 'f':
  848. {
  849. double arg = va_arg(args, double);
  850. Append(String(arg));
  851. break;
  852. }
  853. // Character
  854. case 'c':
  855. {
  856. int arg = va_arg(args, int);
  857. Append(arg);
  858. break;
  859. }
  860. // C string
  861. case 's':
  862. {
  863. char* arg = va_arg(args, char*);
  864. Append(arg);
  865. break;
  866. }
  867. // Hex
  868. case 'x':
  869. {
  870. char buf[CONVERSION_BUFFER_LENGTH];
  871. int arg = va_arg(args, int);
  872. int arglen = ::sprintf(buf, "%x", arg);
  873. Append(buf, arglen);
  874. break;
  875. }
  876. // Pointer
  877. case 'p':
  878. {
  879. char buf[CONVERSION_BUFFER_LENGTH];
  880. int arg = va_arg(args, int);
  881. int arglen = ::sprintf(buf, "%p", reinterpret_cast<void*>(arg));
  882. Append(buf, arglen);
  883. break;
  884. }
  885. case '%':
  886. {
  887. Append("%", 1);
  888. break;
  889. }
  890. }
  891. }
  892. return *this;
  893. }
  894. int String::Compare(const char* lhs, const char* rhs, bool caseSensitive)
  895. {
  896. if (!lhs || !rhs)
  897. return lhs ? 1 : (rhs ? -1 : 0);
  898. if (caseSensitive)
  899. return strcmp(lhs, rhs);
  900. else
  901. {
  902. for (;;)
  903. {
  904. char l = tolower(*lhs);
  905. char r = tolower(*rhs);
  906. if (!l || !r)
  907. return l ? 1 : (r ? -1 : 0);
  908. if (l < r)
  909. return -1;
  910. if (l > r)
  911. return 1;
  912. ++lhs;
  913. ++rhs;
  914. }
  915. }
  916. }
  917. void String::Replace(unsigned pos, unsigned length, const char* srcStart, unsigned srcLength)
  918. {
  919. int delta = (int)srcLength - (int)length;
  920. if (pos + length < length_)
  921. {
  922. if (delta < 0)
  923. {
  924. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  925. Resize(length_ + delta);
  926. }
  927. if (delta > 0)
  928. {
  929. Resize(length_ + delta);
  930. MoveRange(pos + srcLength, pos + length, length_ - pos - length - delta);
  931. }
  932. }
  933. else
  934. Resize(length_ + delta);
  935. CopyChars(buffer_ + pos, srcStart, srcLength);
  936. }
  937. WString::WString() :
  938. length_(0),
  939. buffer_(0)
  940. {
  941. }
  942. WString::WString(const String& str) :
  943. length_(0),
  944. buffer_(0)
  945. {
  946. #ifdef WIN32
  947. unsigned neededSize = 0;
  948. wchar_t temp[3];
  949. unsigned byteOffset = 0;
  950. while (byteOffset < str.Length())
  951. {
  952. wchar_t* dest = temp;
  953. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  954. neededSize += dest - temp;
  955. }
  956. Resize(neededSize);
  957. byteOffset = 0;
  958. wchar_t* dest = buffer_;
  959. while (byteOffset < str.Length())
  960. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  961. #else
  962. Resize(str.LengthUTF8());
  963. unsigned byteOffset = 0;
  964. wchar_t* dest = buffer_;
  965. while (byteOffset < str.Length())
  966. *dest++ = str.NextUTF8Char(byteOffset);
  967. #endif
  968. }
  969. WString::~WString()
  970. {
  971. delete[] buffer_;
  972. }
  973. void WString::Resize(unsigned newSize)
  974. {
  975. if (!newSize)
  976. {
  977. delete[] buffer_;
  978. buffer_ = 0;
  979. length_ = 0;
  980. }
  981. else
  982. {
  983. wchar_t* newBuffer = new wchar_t[newSize + 1];
  984. if (buffer_)
  985. memcpy(newBuffer, buffer_, length_ * sizeof(wchar_t));
  986. newBuffer[newSize] = 0;
  987. buffer_ = newBuffer;
  988. length_ = newSize;
  989. }
  990. }
  991. }