Str.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281
  1. //
  2. // Copyright (c) 2008-2013 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #include "Precompiled.h"
  23. #include "Str.h"
  24. #include "Swap.h"
  25. #include <cstdio>
  26. #include "DebugNew.h"
  27. namespace Urho3D
  28. {
  29. char String::endZero = 0;
  30. const String String::EMPTY;
  31. String::String(const WString& str) :
  32. length_(0),
  33. capacity_(0),
  34. buffer_(&endZero)
  35. {
  36. SetUTF8FromWChar(str.CString());
  37. }
  38. String::String(int value) :
  39. length_(0),
  40. capacity_(0),
  41. buffer_(&endZero)
  42. {
  43. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  44. sprintf(tempBuffer, "%d", value);
  45. *this = tempBuffer;
  46. }
  47. String::String(short value) :
  48. length_(0),
  49. capacity_(0),
  50. buffer_(&endZero)
  51. {
  52. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  53. sprintf(tempBuffer, "%d", value);
  54. *this = tempBuffer;
  55. }
  56. String::String(long value) :
  57. length_(0),
  58. capacity_(0),
  59. buffer_(&endZero)
  60. {
  61. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  62. sprintf(tempBuffer, "%ld", value);
  63. *this = tempBuffer;
  64. }
  65. String::String(long long value) :
  66. length_(0),
  67. capacity_(0),
  68. buffer_(&endZero)
  69. {
  70. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  71. sprintf(tempBuffer, "%lld", value);
  72. *this = tempBuffer;
  73. }
  74. String::String(unsigned value) :
  75. length_(0),
  76. capacity_(0),
  77. buffer_(&endZero)
  78. {
  79. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  80. sprintf(tempBuffer, "%u", value);
  81. *this = tempBuffer;
  82. }
  83. String::String(unsigned short value) :
  84. length_(0),
  85. capacity_(0),
  86. buffer_(&endZero)
  87. {
  88. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  89. sprintf(tempBuffer, "%u", value);
  90. *this = tempBuffer;
  91. }
  92. String::String(unsigned long value) :
  93. length_(0),
  94. capacity_(0),
  95. buffer_(&endZero)
  96. {
  97. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  98. sprintf(tempBuffer, "%lu", value);
  99. *this = tempBuffer;
  100. }
  101. String::String(unsigned long long value) :
  102. length_(0),
  103. capacity_(0),
  104. buffer_(&endZero)
  105. {
  106. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  107. sprintf(tempBuffer, "%llu", value);
  108. *this = tempBuffer;
  109. }
  110. String::String(float value) :
  111. length_(0),
  112. capacity_(0),
  113. buffer_(&endZero)
  114. {
  115. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  116. sprintf(tempBuffer, "%g", value);
  117. *this = tempBuffer;
  118. }
  119. String::String(double value) :
  120. length_(0),
  121. capacity_(0),
  122. buffer_(&endZero)
  123. {
  124. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  125. sprintf(tempBuffer, "%g", value);
  126. *this = tempBuffer;
  127. }
  128. String::String(bool value) :
  129. length_(0),
  130. capacity_(0),
  131. buffer_(&endZero)
  132. {
  133. if (value)
  134. *this = "true";
  135. else
  136. *this = "false";
  137. }
  138. String::String(char value) :
  139. length_(0),
  140. capacity_(0),
  141. buffer_(&endZero)
  142. {
  143. Resize(1);
  144. buffer_[0] = value;
  145. }
  146. String::String(char value, unsigned length) :
  147. length_(0),
  148. capacity_(0),
  149. buffer_(&endZero)
  150. {
  151. Resize(length);
  152. for (unsigned i = 0; i < length; ++i)
  153. buffer_[i] = value;
  154. }
  155. String& String::operator += (int rhs)
  156. {
  157. return *this += String(rhs);
  158. }
  159. String& String::operator += (short rhs)
  160. {
  161. return *this += String(rhs);
  162. }
  163. String& String::operator += (unsigned rhs)
  164. {
  165. return *this += String(rhs);
  166. }
  167. String& String::operator += (unsigned short rhs)
  168. {
  169. return *this += String(rhs);
  170. }
  171. String& String::operator += (float rhs)
  172. {
  173. return *this += String(rhs);
  174. }
  175. String& String::operator += (bool rhs)
  176. {
  177. return *this += String(rhs);
  178. }
  179. void String::Replace(char replaceThis, char replaceWith, bool caseSensitive)
  180. {
  181. if (caseSensitive)
  182. {
  183. for (unsigned i = 0; i < length_; ++i)
  184. {
  185. if (buffer_[i] == replaceThis)
  186. buffer_[i] = replaceWith;
  187. }
  188. }
  189. else
  190. {
  191. replaceThis = tolower(replaceThis);
  192. for (unsigned i = 0; i < length_; ++i)
  193. {
  194. if (tolower(buffer_[i]) == replaceThis)
  195. buffer_[i] = replaceWith;
  196. }
  197. }
  198. }
  199. void String::Replace(const String& replaceThis, const String& replaceWith, bool caseSensitive)
  200. {
  201. unsigned nextPos = 0;
  202. while (nextPos < length_)
  203. {
  204. unsigned pos = Find(replaceThis, nextPos, caseSensitive);
  205. if (pos == NPOS)
  206. break;
  207. Replace(pos, replaceThis.length_, replaceWith);
  208. nextPos = pos + replaceWith.length_;
  209. }
  210. }
  211. void String::Replace(unsigned pos, unsigned length, const String& replaceWith)
  212. {
  213. // If substring is illegal, do nothing
  214. if (pos + length > length_)
  215. return;
  216. Replace(pos, length, replaceWith.buffer_, replaceWith.length_);
  217. }
  218. void String::Replace(unsigned pos, unsigned length, const char* replaceWith)
  219. {
  220. // If substring is illegal, do nothing
  221. if (pos + length > length_)
  222. return;
  223. Replace(pos, length, replaceWith, CStringLength(replaceWith));
  224. }
  225. String::Iterator String::Replace(const String::Iterator& start, const String::Iterator& end, const String& replaceWith)
  226. {
  227. unsigned pos = start - Begin();
  228. if (pos >= length_)
  229. return End();
  230. unsigned length = end - start;
  231. Replace(pos, length, replaceWith);
  232. return Begin() + pos;
  233. }
  234. String String::Replaced(char replaceThis, char replaceWith, bool caseSensitive) const
  235. {
  236. String ret(*this);
  237. ret.Replace(replaceThis, replaceWith, caseSensitive);
  238. return ret;
  239. }
  240. String String::Replaced(const String& replaceThis, const String& replaceWith, bool caseSensitive) const
  241. {
  242. String ret(*this);
  243. ret.Replace(replaceThis, replaceWith, caseSensitive);
  244. return ret;
  245. }
  246. String& String::Append(const String& str)
  247. {
  248. return *this += str;
  249. }
  250. String& String::Append(const char* str)
  251. {
  252. return *this += str;
  253. }
  254. String& String::Append(char c)
  255. {
  256. return *this += c;
  257. }
  258. String& String::Append(const char* str, unsigned length)
  259. {
  260. if (str)
  261. {
  262. unsigned oldLength = length_;
  263. Resize(oldLength + length);
  264. CopyChars(&buffer_[oldLength], str, length);
  265. }
  266. return *this;
  267. }
  268. void String::Insert(unsigned pos, const String& str)
  269. {
  270. if (pos > length_)
  271. pos = length_;
  272. if (pos == length_)
  273. (*this) += str;
  274. else
  275. Replace(pos, 0, str);
  276. }
  277. void String::Insert(unsigned pos, char c)
  278. {
  279. if (pos > length_)
  280. pos = length_;
  281. if (pos == length_)
  282. (*this) += c;
  283. else
  284. {
  285. unsigned oldLength = length_;
  286. Resize(length_ + 1);
  287. MoveRange(pos + 1, pos, oldLength - pos);
  288. buffer_[pos] = c;
  289. }
  290. }
  291. String::Iterator String::Insert(const String::Iterator& dest, const String& str)
  292. {
  293. unsigned pos = dest - Begin();
  294. if (pos > length_)
  295. pos = length_;
  296. Insert(pos, str);
  297. return Begin() + pos;
  298. }
  299. String::Iterator String::Insert(const String::Iterator& dest, const String::Iterator& start, const String::Iterator& end)
  300. {
  301. unsigned pos = dest - Begin();
  302. if (pos > length_)
  303. pos = length_;
  304. unsigned length = end - start;
  305. Replace(pos, 0, &(*start), length);
  306. return Begin() + pos;
  307. }
  308. String::Iterator String::Insert(const String::Iterator& dest, char c)
  309. {
  310. unsigned pos = dest - Begin();
  311. if (pos > length_)
  312. pos = length_;
  313. Insert(pos, c);
  314. return Begin() + pos;
  315. }
  316. void String::Erase(unsigned pos, unsigned length)
  317. {
  318. Replace(pos, length, String::EMPTY);
  319. }
  320. String::Iterator String::Erase(const String::Iterator& it)
  321. {
  322. unsigned pos = it - Begin();
  323. if (pos >= length_)
  324. return End();
  325. Erase(pos);
  326. return Begin() + pos;
  327. }
  328. String::Iterator String::Erase(const String::Iterator& start, const String::Iterator& end)
  329. {
  330. unsigned pos = start - Begin();
  331. if (pos >= length_)
  332. return End();
  333. unsigned length = end - start;
  334. Erase(pos, length);
  335. return Begin() + pos;
  336. }
  337. void String::Resize(unsigned newLength)
  338. {
  339. if (!capacity_)
  340. {
  341. // Calculate initial capacity
  342. capacity_ = newLength + 1;
  343. if (capacity_ < MIN_CAPACITY)
  344. capacity_ = MIN_CAPACITY;
  345. buffer_ = new char[capacity_];
  346. }
  347. else
  348. {
  349. if (newLength && capacity_ < newLength + 1)
  350. {
  351. // Increase the capacity with half each time it is exceeded
  352. while (capacity_ < newLength + 1)
  353. capacity_ += (capacity_ + 1) >> 1;
  354. char* newBuffer = new char[capacity_];
  355. // Move the existing data to the new buffer, then delete the old buffer
  356. if (length_)
  357. CopyChars(newBuffer, buffer_, length_);
  358. delete[] buffer_;
  359. buffer_ = newBuffer;
  360. }
  361. }
  362. buffer_[newLength] = 0;
  363. length_ = newLength;
  364. }
  365. void String::Reserve(unsigned newCapacity)
  366. {
  367. if (newCapacity < length_ + 1)
  368. newCapacity = length_ + 1;
  369. if (newCapacity == capacity_)
  370. return;
  371. char* newBuffer = new char[newCapacity];
  372. // Move the existing data to the new buffer, then delete the old buffer
  373. CopyChars(newBuffer, buffer_, length_ + 1);
  374. if (capacity_)
  375. delete[] buffer_;
  376. capacity_ = newCapacity;
  377. buffer_ = newBuffer;
  378. }
  379. void String::Compact()
  380. {
  381. if (capacity_)
  382. Reserve(length_ + 1);
  383. }
  384. void String::Clear()
  385. {
  386. Resize(0);
  387. }
  388. void String::Swap(String& str)
  389. {
  390. Urho3D::Swap(length_, str.length_);
  391. Urho3D::Swap(capacity_, str.capacity_);
  392. Urho3D::Swap(buffer_, str.buffer_);
  393. }
  394. String String::Substring(unsigned pos) const
  395. {
  396. if (pos < length_)
  397. {
  398. String ret;
  399. ret.Resize(length_ - pos);
  400. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  401. return ret;
  402. }
  403. else
  404. return String();
  405. }
  406. String String::Substring(unsigned pos, unsigned length) const
  407. {
  408. if (pos < length_)
  409. {
  410. String ret;
  411. if (pos + length > length_)
  412. length = length_ - pos;
  413. ret.Resize(length);
  414. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  415. return ret;
  416. }
  417. else
  418. return String();
  419. }
  420. String String::Trimmed() const
  421. {
  422. unsigned trimStart = 0;
  423. unsigned trimEnd = length_;
  424. while (trimStart < trimEnd)
  425. {
  426. char c = buffer_[trimStart];
  427. if (c != ' ' && c != 9)
  428. break;
  429. ++trimStart;
  430. }
  431. while (trimEnd > trimStart)
  432. {
  433. char c = buffer_[trimEnd - 1];
  434. if (c != ' ' && c != 9)
  435. break;
  436. --trimEnd;
  437. }
  438. return Substring(trimStart, trimEnd - trimStart);
  439. }
  440. String String::ToLower() const
  441. {
  442. String ret(*this);
  443. for (unsigned i = 0; i < ret.length_; ++i)
  444. ret[i] = tolower(buffer_[i]);
  445. return ret;
  446. }
  447. String String::ToUpper() const
  448. {
  449. String ret(*this);
  450. for (unsigned i = 0; i < ret.length_; ++i)
  451. ret[i] = toupper(buffer_[i]);
  452. return ret;
  453. }
  454. Vector<String> String::Split(char separator) const
  455. {
  456. return Split(CString(), separator);
  457. }
  458. void String::Join(const Vector<String>& subStrings, String glue)
  459. {
  460. *this = Joined(subStrings, glue);
  461. }
  462. unsigned String::Find(char c, unsigned startPos, bool caseSensitive) const
  463. {
  464. if (caseSensitive)
  465. {
  466. for (unsigned i = startPos; i < length_; ++i)
  467. {
  468. if (buffer_[i] == c)
  469. return i;
  470. }
  471. }
  472. else
  473. {
  474. c = tolower(c);
  475. for (unsigned i = startPos; i < length_; ++i)
  476. {
  477. if (tolower(buffer_[i]) == c)
  478. return i;
  479. }
  480. }
  481. return NPOS;
  482. }
  483. unsigned String::Find(const String& str, unsigned startPos, bool caseSensitive) const
  484. {
  485. if (!str.length_ || str.length_ > length_)
  486. return NPOS;
  487. char first = str.buffer_[0];
  488. if (!caseSensitive)
  489. first = tolower(first);
  490. for (unsigned i = startPos; i <= length_ - str.length_; ++i)
  491. {
  492. char c = buffer_[i];
  493. if (!caseSensitive)
  494. c = tolower(c);
  495. if (c == first)
  496. {
  497. unsigned skip = NPOS;
  498. bool found = true;
  499. for (unsigned j = 1; j < str.length_; ++j)
  500. {
  501. c = buffer_[i + j];
  502. char d = str.buffer_[j];
  503. if (!caseSensitive)
  504. {
  505. c = tolower(c);
  506. d = tolower(d);
  507. }
  508. if (skip == NPOS && c == first)
  509. skip = i + j - 1;
  510. if (c != d)
  511. {
  512. found = false;
  513. if (skip != NPOS)
  514. i = skip;
  515. break;
  516. }
  517. }
  518. if (found)
  519. return i;
  520. }
  521. }
  522. return NPOS;
  523. }
  524. unsigned String::FindLast(char c, unsigned startPos, bool caseSensitive) const
  525. {
  526. if (startPos >= length_)
  527. startPos = length_ - 1;
  528. if (caseSensitive)
  529. {
  530. for (unsigned i = startPos; i < length_; --i)
  531. {
  532. if (buffer_[i] == c)
  533. return i;
  534. }
  535. }
  536. else
  537. {
  538. c = tolower(c);
  539. for (unsigned i = startPos; i < length_; --i)
  540. {
  541. if (tolower(buffer_[i]) == c)
  542. return i;
  543. }
  544. }
  545. return NPOS;
  546. }
  547. unsigned String::FindLast(const String& str, unsigned startPos, bool caseSensitive) const
  548. {
  549. if (!str.length_ || str.length_ > length_)
  550. return NPOS;
  551. if (startPos > length_ - str.length_)
  552. startPos = length_ - str.length_;
  553. char first = str.buffer_[0];
  554. if (!caseSensitive)
  555. first = tolower(first);
  556. for (unsigned i = startPos; i < length_; --i)
  557. {
  558. char c = buffer_[i];
  559. if (!caseSensitive)
  560. c = tolower(c);
  561. if (c == first)
  562. {
  563. bool found = true;
  564. for (unsigned j = 1; j < str.length_; ++j)
  565. {
  566. c = buffer_[i + j];
  567. char d = str.buffer_[j];
  568. if (!caseSensitive)
  569. {
  570. c = tolower(c);
  571. d = tolower(d);
  572. }
  573. if (c != d)
  574. {
  575. found = false;
  576. break;
  577. }
  578. }
  579. if (found)
  580. return i;
  581. }
  582. }
  583. return NPOS;
  584. }
  585. bool String::StartsWith(const String& str, bool caseSensitive) const
  586. {
  587. return Find(str, 0, caseSensitive) == 0;
  588. }
  589. bool String::EndsWith(const String& str, bool caseSensitive) const
  590. {
  591. unsigned pos = FindLast(str, Length() - 1, caseSensitive);
  592. return pos != NPOS && pos == Length() - str.Length();
  593. }
  594. int String::Compare(const String& str, bool caseSensitive) const
  595. {
  596. return Compare(CString(), str.CString(), caseSensitive);
  597. }
  598. int String::Compare(const char* str, bool caseSensitive) const
  599. {
  600. return Compare(CString(), str, caseSensitive);
  601. }
  602. void String::SetUTF8FromLatin1(const char* str)
  603. {
  604. char temp[7];
  605. Clear();
  606. if (!str)
  607. return;
  608. while (*str)
  609. {
  610. char* dest = temp;
  611. EncodeUTF8(dest, *str++);
  612. *dest = 0;
  613. Append(temp);
  614. }
  615. }
  616. void String::SetUTF8FromWChar(const wchar_t* str)
  617. {
  618. char temp[7];
  619. Clear();
  620. if (!str)
  621. return;
  622. #ifdef WIN32
  623. while (*str)
  624. {
  625. unsigned unicodeChar = DecodeUTF16(str);
  626. char* dest = temp;
  627. EncodeUTF8(dest, unicodeChar);
  628. *dest = 0;
  629. Append(temp);
  630. }
  631. #else
  632. while (*str)
  633. {
  634. char* dest = temp;
  635. EncodeUTF8(dest, *str++);
  636. *dest = 0;
  637. Append(temp);
  638. }
  639. #endif
  640. }
  641. unsigned String::LengthUTF8() const
  642. {
  643. unsigned ret = 0;
  644. const char* src = buffer_;
  645. if (!src)
  646. return ret;
  647. const char* end = buffer_ + length_;
  648. while (src < end)
  649. {
  650. DecodeUTF8(src);
  651. ++ret;
  652. }
  653. return ret;
  654. }
  655. unsigned String::ByteOffsetUTF8(unsigned index) const
  656. {
  657. unsigned byteOffset = 0;
  658. unsigned utfPos = 0;
  659. while (utfPos < index && byteOffset < length_)
  660. {
  661. NextUTF8Char(byteOffset);
  662. ++utfPos;
  663. }
  664. return byteOffset;
  665. }
  666. unsigned String::NextUTF8Char(unsigned& byteOffset) const
  667. {
  668. if (!buffer_)
  669. return 0;
  670. const char* src = buffer_ + byteOffset;
  671. unsigned ret = DecodeUTF8(src);
  672. byteOffset = src - buffer_;
  673. return ret;
  674. }
  675. unsigned String::AtUTF8(unsigned index) const
  676. {
  677. unsigned byteOffset = ByteOffsetUTF8(index);
  678. return NextUTF8Char(byteOffset);
  679. }
  680. void String::ReplaceUTF8(unsigned index, unsigned unicodeChar)
  681. {
  682. unsigned utfPos = 0;
  683. unsigned byteOffset = 0;
  684. while (utfPos < index && byteOffset < length_)
  685. {
  686. NextUTF8Char(byteOffset);
  687. ++utfPos;
  688. }
  689. if (utfPos < index)
  690. return;
  691. unsigned beginCharPos = byteOffset;
  692. NextUTF8Char(byteOffset);
  693. char temp[7];
  694. char* dest = temp;
  695. EncodeUTF8(dest, unicodeChar);
  696. *dest = 0;
  697. Replace(beginCharPos, byteOffset - beginCharPos, temp, dest - temp);
  698. }
  699. String& String::AppendUTF8(unsigned unicodeChar)
  700. {
  701. char temp[7];
  702. char* dest = temp;
  703. EncodeUTF8(dest, unicodeChar);
  704. *dest = 0;
  705. return Append(temp);
  706. }
  707. String String::SubstringUTF8(unsigned pos) const
  708. {
  709. unsigned utf8Length = LengthUTF8();
  710. unsigned byteOffset = ByteOffsetUTF8(pos);
  711. String ret;
  712. while (pos < utf8Length)
  713. {
  714. ret.AppendUTF8(NextUTF8Char(byteOffset));
  715. ++pos;
  716. }
  717. return ret;
  718. }
  719. String String::SubstringUTF8(unsigned pos, unsigned length) const
  720. {
  721. unsigned utf8Length = LengthUTF8();
  722. unsigned byteOffset = ByteOffsetUTF8(pos);
  723. unsigned endPos = pos + length;
  724. String ret;
  725. while (pos < endPos && pos < utf8Length)
  726. {
  727. ret.AppendUTF8(NextUTF8Char(byteOffset));
  728. ++pos;
  729. }
  730. return ret;
  731. }
  732. void String::EncodeUTF8(char*& dest, unsigned unicodeChar)
  733. {
  734. if (unicodeChar < 0x80)
  735. *dest++ = unicodeChar;
  736. else if (unicodeChar < 0x800)
  737. {
  738. *dest++ = 0xc0 | ((unicodeChar >> 6) & 0x1f);
  739. *dest++ = 0x80 | (unicodeChar & 0x3f);
  740. }
  741. else if (unicodeChar < 0x10000)
  742. {
  743. *dest++ = 0xe0 | ((unicodeChar >> 12) & 0xf);
  744. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  745. *dest++ = 0x80 | (unicodeChar & 0x3f);
  746. }
  747. else if (unicodeChar < 0x200000)
  748. {
  749. *dest++ = 0xf0 | ((unicodeChar >> 18) & 0x7);
  750. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  751. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  752. *dest++ = 0x80 | (unicodeChar & 0x3f);
  753. }
  754. else if (unicodeChar < 0x4000000)
  755. {
  756. *dest++ = 0xf8 | ((unicodeChar >> 24) & 0x3);
  757. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  758. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  759. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  760. *dest++ = 0x80 | (unicodeChar & 0x3f);
  761. }
  762. else
  763. {
  764. *dest++ = 0xfc | ((unicodeChar >> 30) & 0x1);
  765. *dest++ = 0x80 | ((unicodeChar >> 24) & 0x3f);
  766. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  767. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  768. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  769. *dest++ = 0x80 | (unicodeChar & 0x3f);
  770. }
  771. }
  772. #define GET_NEXT_CONTINUATION_BYTE(ptr) *ptr; if ((unsigned char)*ptr < 0x80 || (unsigned char)*ptr >= 0xc0) return '?'; else ++ptr;
  773. unsigned String::DecodeUTF8(const char*& src)
  774. {
  775. if (src == 0)
  776. return 0;
  777. unsigned char char1 = *src++;
  778. // Check if we are in the middle of a UTF8 character
  779. if (char1 >= 0x80 && char1 < 0xc0)
  780. {
  781. while ((unsigned char)*src >= 0x80 && (unsigned char)*src < 0xc0)
  782. ++src;
  783. return '?';
  784. }
  785. if (char1 < 0x80)
  786. return char1;
  787. else if (char1 < 0xe0)
  788. {
  789. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  790. return (char2 & 0x3f) | ((char1 & 0x1f) << 6);
  791. }
  792. else if (char1 < 0xf0)
  793. {
  794. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  795. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  796. return (char3 & 0x3f) | ((char2 & 0x3f) << 6) | ((char1 & 0xf) << 12);
  797. }
  798. else if (char1 < 0xf8)
  799. {
  800. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  801. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  802. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  803. return (char4 & 0x3f) | ((char3 & 0x3f) << 6) | ((char2 & 0x3f) << 12) | ((char1 & 0x7) << 18);
  804. }
  805. else if (char1 < 0xfc)
  806. {
  807. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  808. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  809. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  810. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  811. return (char5 & 0x3f) | ((char4 & 0x3f) << 6) | ((char3 & 0x3f) << 12) | ((char2 & 0x3f) << 18) | ((char1 & 0x3) << 24);
  812. }
  813. else
  814. {
  815. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  816. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  817. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  818. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  819. unsigned char char6 = GET_NEXT_CONTINUATION_BYTE(src);
  820. return (char6 & 0x3f) | ((char5 & 0x3f) << 6) | ((char4 & 0x3f) << 12) | ((char3 & 0x3f) << 18) | ((char2 & 0x3f) << 24) |
  821. ((char1 & 0x1) << 30);
  822. }
  823. }
  824. #ifdef WIN32
  825. void String::EncodeUTF16(wchar_t*& dest, unsigned unicodeChar)
  826. {
  827. if (unicodeChar < 0x10000)
  828. *dest++ = unicodeChar;
  829. else
  830. {
  831. unicodeChar -= 0x10000;
  832. *dest++ = 0xd800 | ((unicodeChar >> 10) & 0x3ff);
  833. *dest++ = 0xdc00 | (unicodeChar & 0x3ff);
  834. }
  835. }
  836. unsigned String::DecodeUTF16(const wchar_t*& src)
  837. {
  838. if (src == 0)
  839. return 0;
  840. unsigned short word1 = *src;
  841. // Check if we are at a low surrogate
  842. word1 = *src++;
  843. if (word1 >= 0xdc00 && word1 < 0xe000)
  844. {
  845. while (*src >= 0xdc00 && *src < 0xe000)
  846. ++src;
  847. return '?';
  848. }
  849. if (word1 < 0xd800 || word1 >= 0xe00)
  850. return word1;
  851. else
  852. {
  853. unsigned short word2 = *src++;
  854. if (word2 < 0xdc00 || word2 >= 0xe000)
  855. {
  856. --src;
  857. return '?';
  858. }
  859. else
  860. return ((word1 & 0x3ff) << 10) | (word2 & 0x3ff) | 0x10000;
  861. }
  862. }
  863. #endif
  864. Vector<String> String::Split(const char* str, char separator)
  865. {
  866. Vector<String> ret;
  867. unsigned pos = 0;
  868. unsigned length = CStringLength(str);
  869. while (pos < length)
  870. {
  871. if (str[pos] != separator)
  872. break;
  873. ++pos;
  874. }
  875. while (pos < length)
  876. {
  877. unsigned start = pos;
  878. while (start < length)
  879. {
  880. if (str[start] == separator)
  881. break;
  882. ++start;
  883. }
  884. if (start == length)
  885. {
  886. ret.Push(String(&str[pos]));
  887. break;
  888. }
  889. unsigned end = start;
  890. while (end < length)
  891. {
  892. if (str[end] != separator)
  893. break;
  894. ++end;
  895. }
  896. ret.Push(String(&str[pos], start - pos));
  897. pos = end;
  898. }
  899. return ret;
  900. }
  901. String String::Joined(const Vector<String>& subStrings, String glue)
  902. {
  903. if (subStrings.Empty())
  904. return String();
  905. String joinedString(subStrings[0]);
  906. for (unsigned i = 1; i < subStrings.Size(); ++i)
  907. joinedString.Append(glue).Append(subStrings[i]);
  908. return joinedString;
  909. }
  910. String& String::AppendWithFormat(const char* formatString, ... )
  911. {
  912. va_list args;
  913. va_start(args, formatString);
  914. AppendWithFormatArgs(formatString, args);
  915. va_end(args);
  916. return *this;
  917. }
  918. String& String::AppendWithFormatArgs(const char* formatString, va_list args)
  919. {
  920. int pos = 0, lastPos = 0;
  921. int length = strlen(formatString);
  922. while (true)
  923. {
  924. // Scan the format string and find %a argument where a is one of d, f, s ...
  925. while (pos < length && formatString[pos] != '%') pos++;
  926. Append(formatString + lastPos, pos - lastPos);
  927. if (pos >= length)
  928. return *this;
  929. char arg = formatString[pos + 1];
  930. pos += 2;
  931. lastPos = pos;
  932. switch (arg)
  933. {
  934. // Integer
  935. case 'd':
  936. case 'i':
  937. {
  938. int arg = va_arg(args, int);
  939. Append(String(arg));
  940. break;
  941. }
  942. // Unsigned
  943. case 'u':
  944. {
  945. unsigned arg = va_arg(args, unsigned);
  946. Append(String(arg));
  947. break;
  948. }
  949. // Real
  950. case 'f':
  951. {
  952. double arg = va_arg(args, double);
  953. Append(String(arg));
  954. break;
  955. }
  956. // Character
  957. case 'c':
  958. {
  959. int arg = va_arg(args, int);
  960. Append(arg);
  961. break;
  962. }
  963. // C string
  964. case 's':
  965. {
  966. char* arg = va_arg(args, char*);
  967. Append(arg);
  968. break;
  969. }
  970. // Hex
  971. case 'x':
  972. {
  973. char buf[CONVERSION_BUFFER_LENGTH];
  974. int arg = va_arg(args, int);
  975. int arglen = ::sprintf(buf, "%x", arg);
  976. Append(buf, arglen);
  977. break;
  978. }
  979. // Pointer
  980. case 'p':
  981. {
  982. char buf[CONVERSION_BUFFER_LENGTH];
  983. int arg = va_arg(args, int);
  984. int arglen = ::sprintf(buf, "%p", reinterpret_cast<void*>(arg));
  985. Append(buf, arglen);
  986. break;
  987. }
  988. case '%':
  989. {
  990. Append("%", 1);
  991. break;
  992. }
  993. }
  994. }
  995. return *this;
  996. }
  997. int String::Compare(const char* lhs, const char* rhs, bool caseSensitive)
  998. {
  999. if (!lhs || !rhs)
  1000. return lhs ? 1 : (rhs ? -1 : 0);
  1001. if (caseSensitive)
  1002. return strcmp(lhs, rhs);
  1003. else
  1004. {
  1005. for (;;)
  1006. {
  1007. char l = tolower(*lhs);
  1008. char r = tolower(*rhs);
  1009. if (!l || !r)
  1010. return l ? 1 : (r ? -1 : 0);
  1011. if (l < r)
  1012. return -1;
  1013. if (l > r)
  1014. return 1;
  1015. ++lhs;
  1016. ++rhs;
  1017. }
  1018. }
  1019. }
  1020. void String::Replace(unsigned pos, unsigned length, const char* srcStart, unsigned srcLength)
  1021. {
  1022. int delta = (int)srcLength - (int)length;
  1023. if (pos + length < length_)
  1024. {
  1025. if (delta < 0)
  1026. {
  1027. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  1028. Resize(length_ + delta);
  1029. }
  1030. if (delta > 0)
  1031. {
  1032. Resize(length_ + delta);
  1033. MoveRange(pos + srcLength, pos + length, length_ - pos - length - delta);
  1034. }
  1035. }
  1036. else
  1037. Resize(length_ + delta);
  1038. CopyChars(buffer_ + pos, srcStart, srcLength);
  1039. }
  1040. WString::WString() :
  1041. length_(0),
  1042. buffer_(0)
  1043. {
  1044. }
  1045. WString::WString(const String& str) :
  1046. length_(0),
  1047. buffer_(0)
  1048. {
  1049. #ifdef WIN32
  1050. unsigned neededSize = 0;
  1051. wchar_t temp[3];
  1052. unsigned byteOffset = 0;
  1053. while (byteOffset < str.Length())
  1054. {
  1055. wchar_t* dest = temp;
  1056. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  1057. neededSize += dest - temp;
  1058. }
  1059. Resize(neededSize);
  1060. byteOffset = 0;
  1061. wchar_t* dest = buffer_;
  1062. while (byteOffset < str.Length())
  1063. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  1064. #else
  1065. Resize(str.LengthUTF8());
  1066. unsigned byteOffset = 0;
  1067. wchar_t* dest = buffer_;
  1068. while (byteOffset < str.Length())
  1069. *dest++ = str.NextUTF8Char(byteOffset);
  1070. #endif
  1071. }
  1072. WString::~WString()
  1073. {
  1074. delete[] buffer_;
  1075. }
  1076. void WString::Resize(unsigned newSize)
  1077. {
  1078. if (!newSize)
  1079. {
  1080. delete[] buffer_;
  1081. buffer_ = 0;
  1082. length_ = 0;
  1083. }
  1084. else
  1085. {
  1086. wchar_t* newBuffer = new wchar_t[newSize + 1];
  1087. if (buffer_)
  1088. memcpy(newBuffer, buffer_, length_ * sizeof(wchar_t));
  1089. newBuffer[newSize] = 0;
  1090. buffer_ = newBuffer;
  1091. length_ = newSize;
  1092. }
  1093. }
  1094. }