Str.cpp 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294
  1. //
  2. // Copyright (c) 2008-2014 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #include "../Precompiled.h"
  23. #include "../Container/Str.h"
  24. #include "../Container/Swap.h"
  25. #include <cstdio>
  26. #include "../Container/DebugNew.h"
  27. namespace Urho3D
  28. {
  29. char String::endZero = 0;
  30. const String String::EMPTY;
  31. String::String(const WString& str) :
  32. length_(0),
  33. capacity_(0),
  34. buffer_(&endZero)
  35. {
  36. SetUTF8FromWChar(str.CString());
  37. }
  38. String::String(int value) :
  39. length_(0),
  40. capacity_(0),
  41. buffer_(&endZero)
  42. {
  43. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  44. sprintf(tempBuffer, "%d", value);
  45. *this = tempBuffer;
  46. }
  47. String::String(short value) :
  48. length_(0),
  49. capacity_(0),
  50. buffer_(&endZero)
  51. {
  52. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  53. sprintf(tempBuffer, "%d", value);
  54. *this = tempBuffer;
  55. }
  56. String::String(long value) :
  57. length_(0),
  58. capacity_(0),
  59. buffer_(&endZero)
  60. {
  61. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  62. sprintf(tempBuffer, "%ld", value);
  63. *this = tempBuffer;
  64. }
  65. String::String(long long value) :
  66. length_(0),
  67. capacity_(0),
  68. buffer_(&endZero)
  69. {
  70. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  71. sprintf(tempBuffer, "%lld", value);
  72. *this = tempBuffer;
  73. }
  74. String::String(unsigned value) :
  75. length_(0),
  76. capacity_(0),
  77. buffer_(&endZero)
  78. {
  79. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  80. sprintf(tempBuffer, "%u", value);
  81. *this = tempBuffer;
  82. }
  83. String::String(unsigned short value) :
  84. length_(0),
  85. capacity_(0),
  86. buffer_(&endZero)
  87. {
  88. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  89. sprintf(tempBuffer, "%u", value);
  90. *this = tempBuffer;
  91. }
  92. String::String(unsigned long value) :
  93. length_(0),
  94. capacity_(0),
  95. buffer_(&endZero)
  96. {
  97. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  98. sprintf(tempBuffer, "%lu", value);
  99. *this = tempBuffer;
  100. }
  101. String::String(unsigned long long value) :
  102. length_(0),
  103. capacity_(0),
  104. buffer_(&endZero)
  105. {
  106. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  107. sprintf(tempBuffer, "%llu", value);
  108. *this = tempBuffer;
  109. }
  110. String::String(float value) :
  111. length_(0),
  112. capacity_(0),
  113. buffer_(&endZero)
  114. {
  115. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  116. sprintf(tempBuffer, "%g", value);
  117. *this = tempBuffer;
  118. }
  119. String::String(double value) :
  120. length_(0),
  121. capacity_(0),
  122. buffer_(&endZero)
  123. {
  124. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  125. sprintf(tempBuffer, "%g", value);
  126. *this = tempBuffer;
  127. }
  128. String::String(bool value) :
  129. length_(0),
  130. capacity_(0),
  131. buffer_(&endZero)
  132. {
  133. if (value)
  134. *this = "true";
  135. else
  136. *this = "false";
  137. }
  138. String::String(char value) :
  139. length_(0),
  140. capacity_(0),
  141. buffer_(&endZero)
  142. {
  143. Resize(1);
  144. buffer_[0] = value;
  145. }
  146. String::String(char value, unsigned length) :
  147. length_(0),
  148. capacity_(0),
  149. buffer_(&endZero)
  150. {
  151. Resize(length);
  152. for (unsigned i = 0; i < length; ++i)
  153. buffer_[i] = value;
  154. }
  155. String& String::operator += (int rhs)
  156. {
  157. return *this += String(rhs);
  158. }
  159. String& String::operator += (short rhs)
  160. {
  161. return *this += String(rhs);
  162. }
  163. String& String::operator += (unsigned rhs)
  164. {
  165. return *this += String(rhs);
  166. }
  167. String& String::operator += (unsigned short rhs)
  168. {
  169. return *this += String(rhs);
  170. }
  171. String& String::operator += (float rhs)
  172. {
  173. return *this += String(rhs);
  174. }
  175. String& String::operator += (bool rhs)
  176. {
  177. return *this += String(rhs);
  178. }
  179. void String::Replace(char replaceThis, char replaceWith, bool caseSensitive)
  180. {
  181. if (caseSensitive)
  182. {
  183. for (unsigned i = 0; i < length_; ++i)
  184. {
  185. if (buffer_[i] == replaceThis)
  186. buffer_[i] = replaceWith;
  187. }
  188. }
  189. else
  190. {
  191. replaceThis = tolower(replaceThis);
  192. for (unsigned i = 0; i < length_; ++i)
  193. {
  194. if (tolower(buffer_[i]) == replaceThis)
  195. buffer_[i] = replaceWith;
  196. }
  197. }
  198. }
  199. void String::Replace(const String& replaceThis, const String& replaceWith, bool caseSensitive)
  200. {
  201. unsigned nextPos = 0;
  202. while (nextPos < length_)
  203. {
  204. unsigned pos = Find(replaceThis, nextPos, caseSensitive);
  205. if (pos == NPOS)
  206. break;
  207. Replace(pos, replaceThis.length_, replaceWith);
  208. nextPos = pos + replaceWith.length_;
  209. }
  210. }
  211. void String::Replace(unsigned pos, unsigned length, const String& replaceWith)
  212. {
  213. // If substring is illegal, do nothing
  214. if (pos + length > length_)
  215. return;
  216. Replace(pos, length, replaceWith.buffer_, replaceWith.length_);
  217. }
  218. void String::Replace(unsigned pos, unsigned length, const char* replaceWith)
  219. {
  220. // If substring is illegal, do nothing
  221. if (pos + length > length_)
  222. return;
  223. Replace(pos, length, replaceWith, CStringLength(replaceWith));
  224. }
  225. String::Iterator String::Replace(const String::Iterator& start, const String::Iterator& end, const String& replaceWith)
  226. {
  227. unsigned pos = start - Begin();
  228. if (pos >= length_)
  229. return End();
  230. unsigned length = end - start;
  231. Replace(pos, length, replaceWith);
  232. return Begin() + pos;
  233. }
  234. String String::Replaced(char replaceThis, char replaceWith, bool caseSensitive) const
  235. {
  236. String ret(*this);
  237. ret.Replace(replaceThis, replaceWith, caseSensitive);
  238. return ret;
  239. }
  240. String String::Replaced(const String& replaceThis, const String& replaceWith, bool caseSensitive) const
  241. {
  242. String ret(*this);
  243. ret.Replace(replaceThis, replaceWith, caseSensitive);
  244. return ret;
  245. }
  246. String& String::Append(const String& str)
  247. {
  248. return *this += str;
  249. }
  250. String& String::Append(const char* str)
  251. {
  252. return *this += str;
  253. }
  254. String& String::Append(char c)
  255. {
  256. return *this += c;
  257. }
  258. String& String::Append(const char* str, unsigned length)
  259. {
  260. if (str)
  261. {
  262. unsigned oldLength = length_;
  263. Resize(oldLength + length);
  264. CopyChars(&buffer_[oldLength], str, length);
  265. }
  266. return *this;
  267. }
  268. void String::Insert(unsigned pos, const String& str)
  269. {
  270. if (pos > length_)
  271. pos = length_;
  272. if (pos == length_)
  273. (*this) += str;
  274. else
  275. Replace(pos, 0, str);
  276. }
  277. void String::Insert(unsigned pos, char c)
  278. {
  279. if (pos > length_)
  280. pos = length_;
  281. if (pos == length_)
  282. (*this) += c;
  283. else
  284. {
  285. unsigned oldLength = length_;
  286. Resize(length_ + 1);
  287. MoveRange(pos + 1, pos, oldLength - pos);
  288. buffer_[pos] = c;
  289. }
  290. }
  291. String::Iterator String::Insert(const String::Iterator& dest, const String& str)
  292. {
  293. unsigned pos = dest - Begin();
  294. if (pos > length_)
  295. pos = length_;
  296. Insert(pos, str);
  297. return Begin() + pos;
  298. }
  299. String::Iterator String::Insert(const String::Iterator& dest, const String::Iterator& start, const String::Iterator& end)
  300. {
  301. unsigned pos = dest - Begin();
  302. if (pos > length_)
  303. pos = length_;
  304. unsigned length = end - start;
  305. Replace(pos, 0, &(*start), length);
  306. return Begin() + pos;
  307. }
  308. String::Iterator String::Insert(const String::Iterator& dest, char c)
  309. {
  310. unsigned pos = dest - Begin();
  311. if (pos > length_)
  312. pos = length_;
  313. Insert(pos, c);
  314. return Begin() + pos;
  315. }
  316. void String::Erase(unsigned pos, unsigned length)
  317. {
  318. Replace(pos, length, String::EMPTY);
  319. }
  320. String::Iterator String::Erase(const String::Iterator& it)
  321. {
  322. unsigned pos = it - Begin();
  323. if (pos >= length_)
  324. return End();
  325. Erase(pos);
  326. return Begin() + pos;
  327. }
  328. String::Iterator String::Erase(const String::Iterator& start, const String::Iterator& end)
  329. {
  330. unsigned pos = start - Begin();
  331. if (pos >= length_)
  332. return End();
  333. unsigned length = end - start;
  334. Erase(pos, length);
  335. return Begin() + pos;
  336. }
  337. void String::Resize(unsigned newLength)
  338. {
  339. if (!capacity_)
  340. {
  341. // If zero length requested, do not allocate buffer yet
  342. if (!newLength)
  343. return;
  344. // Calculate initial capacity
  345. capacity_ = newLength + 1;
  346. if (capacity_ < MIN_CAPACITY)
  347. capacity_ = MIN_CAPACITY;
  348. buffer_ = new char[capacity_];
  349. }
  350. else
  351. {
  352. if (newLength && capacity_ < newLength + 1)
  353. {
  354. // Increase the capacity with half each time it is exceeded
  355. while (capacity_ < newLength + 1)
  356. capacity_ += (capacity_ + 1) >> 1;
  357. char* newBuffer = new char[capacity_];
  358. // Move the existing data to the new buffer, then delete the old buffer
  359. if (length_)
  360. CopyChars(newBuffer, buffer_, length_);
  361. delete[] buffer_;
  362. buffer_ = newBuffer;
  363. }
  364. }
  365. buffer_[newLength] = 0;
  366. length_ = newLength;
  367. }
  368. void String::Reserve(unsigned newCapacity)
  369. {
  370. if (newCapacity < length_ + 1)
  371. newCapacity = length_ + 1;
  372. if (newCapacity == capacity_)
  373. return;
  374. char* newBuffer = new char[newCapacity];
  375. // Move the existing data to the new buffer, then delete the old buffer
  376. CopyChars(newBuffer, buffer_, length_ + 1);
  377. if (capacity_)
  378. delete[] buffer_;
  379. capacity_ = newCapacity;
  380. buffer_ = newBuffer;
  381. }
  382. void String::Compact()
  383. {
  384. if (capacity_)
  385. Reserve(length_ + 1);
  386. }
  387. void String::Clear()
  388. {
  389. Resize(0);
  390. }
  391. void String::Swap(String& str)
  392. {
  393. Urho3D::Swap(length_, str.length_);
  394. Urho3D::Swap(capacity_, str.capacity_);
  395. Urho3D::Swap(buffer_, str.buffer_);
  396. }
  397. String String::Substring(unsigned pos) const
  398. {
  399. if (pos < length_)
  400. {
  401. String ret;
  402. ret.Resize(length_ - pos);
  403. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  404. return ret;
  405. }
  406. else
  407. return String();
  408. }
  409. String String::Substring(unsigned pos, unsigned length) const
  410. {
  411. if (pos < length_)
  412. {
  413. String ret;
  414. if (pos + length > length_)
  415. length = length_ - pos;
  416. ret.Resize(length);
  417. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  418. return ret;
  419. }
  420. else
  421. return String();
  422. }
  423. String String::Trimmed() const
  424. {
  425. unsigned trimStart = 0;
  426. unsigned trimEnd = length_;
  427. while (trimStart < trimEnd)
  428. {
  429. char c = buffer_[trimStart];
  430. if (c != ' ' && c != 9)
  431. break;
  432. ++trimStart;
  433. }
  434. while (trimEnd > trimStart)
  435. {
  436. char c = buffer_[trimEnd - 1];
  437. if (c != ' ' && c != 9)
  438. break;
  439. --trimEnd;
  440. }
  441. return Substring(trimStart, trimEnd - trimStart);
  442. }
  443. String String::ToLower() const
  444. {
  445. String ret(*this);
  446. for (unsigned i = 0; i < ret.length_; ++i)
  447. ret[i] = tolower(buffer_[i]);
  448. return ret;
  449. }
  450. String String::ToUpper() const
  451. {
  452. String ret(*this);
  453. for (unsigned i = 0; i < ret.length_; ++i)
  454. ret[i] = toupper(buffer_[i]);
  455. return ret;
  456. }
  457. Vector<String> String::Split(char separator) const
  458. {
  459. return Split(CString(), separator);
  460. }
  461. void String::Join(const Vector<String>& subStrings, String glue)
  462. {
  463. *this = Joined(subStrings, glue);
  464. }
  465. unsigned String::Find(char c, unsigned startPos, bool caseSensitive) const
  466. {
  467. if (caseSensitive)
  468. {
  469. for (unsigned i = startPos; i < length_; ++i)
  470. {
  471. if (buffer_[i] == c)
  472. return i;
  473. }
  474. }
  475. else
  476. {
  477. c = tolower(c);
  478. for (unsigned i = startPos; i < length_; ++i)
  479. {
  480. if (tolower(buffer_[i]) == c)
  481. return i;
  482. }
  483. }
  484. return NPOS;
  485. }
  486. unsigned String::Find(const String& str, unsigned startPos, bool caseSensitive) const
  487. {
  488. if (!str.length_ || str.length_ > length_)
  489. return NPOS;
  490. char first = str.buffer_[0];
  491. if (!caseSensitive)
  492. first = tolower(first);
  493. for (unsigned i = startPos; i <= length_ - str.length_; ++i)
  494. {
  495. char c = buffer_[i];
  496. if (!caseSensitive)
  497. c = tolower(c);
  498. if (c == first)
  499. {
  500. unsigned skip = NPOS;
  501. bool found = true;
  502. for (unsigned j = 1; j < str.length_; ++j)
  503. {
  504. c = buffer_[i + j];
  505. char d = str.buffer_[j];
  506. if (!caseSensitive)
  507. {
  508. c = tolower(c);
  509. d = tolower(d);
  510. }
  511. if (skip == NPOS && c == first)
  512. skip = i + j - 1;
  513. if (c != d)
  514. {
  515. found = false;
  516. if (skip != NPOS)
  517. i = skip;
  518. break;
  519. }
  520. }
  521. if (found)
  522. return i;
  523. }
  524. }
  525. return NPOS;
  526. }
  527. unsigned String::FindLast(char c, unsigned startPos, bool caseSensitive) const
  528. {
  529. if (startPos >= length_)
  530. startPos = length_ - 1;
  531. if (caseSensitive)
  532. {
  533. for (unsigned i = startPos; i < length_; --i)
  534. {
  535. if (buffer_[i] == c)
  536. return i;
  537. }
  538. }
  539. else
  540. {
  541. c = tolower(c);
  542. for (unsigned i = startPos; i < length_; --i)
  543. {
  544. if (tolower(buffer_[i]) == c)
  545. return i;
  546. }
  547. }
  548. return NPOS;
  549. }
  550. unsigned String::FindLast(const String& str, unsigned startPos, bool caseSensitive) const
  551. {
  552. if (!str.length_ || str.length_ > length_)
  553. return NPOS;
  554. if (startPos > length_ - str.length_)
  555. startPos = length_ - str.length_;
  556. char first = str.buffer_[0];
  557. if (!caseSensitive)
  558. first = tolower(first);
  559. for (unsigned i = startPos; i < length_; --i)
  560. {
  561. char c = buffer_[i];
  562. if (!caseSensitive)
  563. c = tolower(c);
  564. if (c == first)
  565. {
  566. bool found = true;
  567. for (unsigned j = 1; j < str.length_; ++j)
  568. {
  569. c = buffer_[i + j];
  570. char d = str.buffer_[j];
  571. if (!caseSensitive)
  572. {
  573. c = tolower(c);
  574. d = tolower(d);
  575. }
  576. if (c != d)
  577. {
  578. found = false;
  579. break;
  580. }
  581. }
  582. if (found)
  583. return i;
  584. }
  585. }
  586. return NPOS;
  587. }
  588. bool String::StartsWith(const String& str, bool caseSensitive) const
  589. {
  590. return Find(str, 0, caseSensitive) == 0;
  591. }
  592. bool String::EndsWith(const String& str, bool caseSensitive) const
  593. {
  594. unsigned pos = FindLast(str, Length() - 1, caseSensitive);
  595. return pos != NPOS && pos == Length() - str.Length();
  596. }
  597. int String::Compare(const String& str, bool caseSensitive) const
  598. {
  599. return Compare(CString(), str.CString(), caseSensitive);
  600. }
  601. int String::Compare(const char* str, bool caseSensitive) const
  602. {
  603. return Compare(CString(), str, caseSensitive);
  604. }
  605. void String::SetUTF8FromLatin1(const char* str)
  606. {
  607. char temp[7];
  608. Clear();
  609. if (!str)
  610. return;
  611. while (*str)
  612. {
  613. char* dest = temp;
  614. EncodeUTF8(dest, *str++);
  615. *dest = 0;
  616. Append(temp);
  617. }
  618. }
  619. void String::SetUTF8FromWChar(const wchar_t* str)
  620. {
  621. char temp[7];
  622. Clear();
  623. if (!str)
  624. return;
  625. #ifdef WIN32
  626. while (*str)
  627. {
  628. unsigned unicodeChar = DecodeUTF16(str);
  629. char* dest = temp;
  630. EncodeUTF8(dest, unicodeChar);
  631. *dest = 0;
  632. Append(temp);
  633. }
  634. #else
  635. while (*str)
  636. {
  637. char* dest = temp;
  638. EncodeUTF8(dest, *str++);
  639. *dest = 0;
  640. Append(temp);
  641. }
  642. #endif
  643. }
  644. unsigned String::LengthUTF8() const
  645. {
  646. unsigned ret = 0;
  647. const char* src = buffer_;
  648. if (!src)
  649. return ret;
  650. const char* end = buffer_ + length_;
  651. while (src < end)
  652. {
  653. DecodeUTF8(src);
  654. ++ret;
  655. }
  656. return ret;
  657. }
  658. unsigned String::ByteOffsetUTF8(unsigned index) const
  659. {
  660. unsigned byteOffset = 0;
  661. unsigned utfPos = 0;
  662. while (utfPos < index && byteOffset < length_)
  663. {
  664. NextUTF8Char(byteOffset);
  665. ++utfPos;
  666. }
  667. return byteOffset;
  668. }
  669. unsigned String::NextUTF8Char(unsigned& byteOffset) const
  670. {
  671. if (!buffer_)
  672. return 0;
  673. const char* src = buffer_ + byteOffset;
  674. unsigned ret = DecodeUTF8(src);
  675. byteOffset = src - buffer_;
  676. return ret;
  677. }
  678. unsigned String::AtUTF8(unsigned index) const
  679. {
  680. unsigned byteOffset = ByteOffsetUTF8(index);
  681. return NextUTF8Char(byteOffset);
  682. }
  683. void String::ReplaceUTF8(unsigned index, unsigned unicodeChar)
  684. {
  685. unsigned utfPos = 0;
  686. unsigned byteOffset = 0;
  687. while (utfPos < index && byteOffset < length_)
  688. {
  689. NextUTF8Char(byteOffset);
  690. ++utfPos;
  691. }
  692. if (utfPos < index)
  693. return;
  694. unsigned beginCharPos = byteOffset;
  695. NextUTF8Char(byteOffset);
  696. char temp[7];
  697. char* dest = temp;
  698. EncodeUTF8(dest, unicodeChar);
  699. *dest = 0;
  700. Replace(beginCharPos, byteOffset - beginCharPos, temp, dest - temp);
  701. }
  702. String& String::AppendUTF8(unsigned unicodeChar)
  703. {
  704. char temp[7];
  705. char* dest = temp;
  706. EncodeUTF8(dest, unicodeChar);
  707. *dest = 0;
  708. return Append(temp);
  709. }
  710. String String::SubstringUTF8(unsigned pos) const
  711. {
  712. unsigned utf8Length = LengthUTF8();
  713. unsigned byteOffset = ByteOffsetUTF8(pos);
  714. String ret;
  715. while (pos < utf8Length)
  716. {
  717. ret.AppendUTF8(NextUTF8Char(byteOffset));
  718. ++pos;
  719. }
  720. return ret;
  721. }
  722. String String::SubstringUTF8(unsigned pos, unsigned length) const
  723. {
  724. unsigned utf8Length = LengthUTF8();
  725. unsigned byteOffset = ByteOffsetUTF8(pos);
  726. unsigned endPos = pos + length;
  727. String ret;
  728. while (pos < endPos && pos < utf8Length)
  729. {
  730. ret.AppendUTF8(NextUTF8Char(byteOffset));
  731. ++pos;
  732. }
  733. return ret;
  734. }
  735. void String::EncodeUTF8(char*& dest, unsigned unicodeChar)
  736. {
  737. if (unicodeChar < 0x80)
  738. *dest++ = unicodeChar;
  739. else if (unicodeChar < 0x800)
  740. {
  741. dest[0] = 0xc0 | ((unicodeChar >> 6) & 0x1f);
  742. dest[1] = 0x80 | (unicodeChar & 0x3f);
  743. dest += 2;
  744. }
  745. else if (unicodeChar < 0x10000)
  746. {
  747. dest[0] = 0xe0 | ((unicodeChar >> 12) & 0xf);
  748. dest[1] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  749. dest[2] = 0x80 | (unicodeChar & 0x3f);
  750. dest += 3;
  751. }
  752. else if (unicodeChar < 0x200000)
  753. {
  754. dest[0] = 0xf0 | ((unicodeChar >> 18) & 0x7);
  755. dest[1] = 0x80 | ((unicodeChar >> 12) & 0x3f);
  756. dest[2] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  757. dest[3] = 0x80 | (unicodeChar & 0x3f);
  758. dest += 4;
  759. }
  760. else if (unicodeChar < 0x4000000)
  761. {
  762. dest[0] = 0xf8 | ((unicodeChar >> 24) & 0x3);
  763. dest[1] = 0x80 | ((unicodeChar >> 18) & 0x3f);
  764. dest[2] = 0x80 | ((unicodeChar >> 12) & 0x3f);
  765. dest[3] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  766. dest[4] = 0x80 | (unicodeChar & 0x3f);
  767. dest += 5;
  768. }
  769. else
  770. {
  771. dest[0] = 0xfc | ((unicodeChar >> 30) & 0x1);
  772. dest[1] = 0x80 | ((unicodeChar >> 24) & 0x3f);
  773. dest[2] = 0x80 | ((unicodeChar >> 18) & 0x3f);
  774. dest[3] = 0x80 | ((unicodeChar >> 12) & 0x3f);
  775. dest[4] = 0x80 | ((unicodeChar >> 6) & 0x3f);
  776. dest[5] = 0x80 | (unicodeChar & 0x3f);
  777. dest += 6;
  778. }
  779. }
  780. #define GET_NEXT_CONTINUATION_BYTE(ptr) *ptr; if ((unsigned char)*ptr < 0x80 || (unsigned char)*ptr >= 0xc0) return '?'; else ++ptr;
  781. unsigned String::DecodeUTF8(const char*& src)
  782. {
  783. if (src == 0)
  784. return 0;
  785. unsigned char char1 = *src++;
  786. // Check if we are in the middle of a UTF8 character
  787. if (char1 >= 0x80 && char1 < 0xc0)
  788. {
  789. while ((unsigned char)*src >= 0x80 && (unsigned char)*src < 0xc0)
  790. ++src;
  791. return '?';
  792. }
  793. if (char1 < 0x80)
  794. return char1;
  795. else if (char1 < 0xe0)
  796. {
  797. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  798. return (char2 & 0x3f) | ((char1 & 0x1f) << 6);
  799. }
  800. else if (char1 < 0xf0)
  801. {
  802. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  803. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  804. return (char3 & 0x3f) | ((char2 & 0x3f) << 6) | ((char1 & 0xf) << 12);
  805. }
  806. else if (char1 < 0xf8)
  807. {
  808. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  809. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  810. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  811. return (char4 & 0x3f) | ((char3 & 0x3f) << 6) | ((char2 & 0x3f) << 12) | ((char1 & 0x7) << 18);
  812. }
  813. else if (char1 < 0xfc)
  814. {
  815. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  816. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  817. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  818. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  819. return (char5 & 0x3f) | ((char4 & 0x3f) << 6) | ((char3 & 0x3f) << 12) | ((char2 & 0x3f) << 18) | ((char1 & 0x3) << 24);
  820. }
  821. else
  822. {
  823. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  824. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  825. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  826. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  827. unsigned char char6 = GET_NEXT_CONTINUATION_BYTE(src);
  828. return (char6 & 0x3f) | ((char5 & 0x3f) << 6) | ((char4 & 0x3f) << 12) | ((char3 & 0x3f) << 18) | ((char2 & 0x3f) << 24) |
  829. ((char1 & 0x1) << 30);
  830. }
  831. }
  832. #ifdef WIN32
  833. void String::EncodeUTF16(wchar_t*& dest, unsigned unicodeChar)
  834. {
  835. if (unicodeChar < 0x10000)
  836. *dest++ = unicodeChar;
  837. else
  838. {
  839. unicodeChar -= 0x10000;
  840. *dest++ = 0xd800 | ((unicodeChar >> 10) & 0x3ff);
  841. *dest++ = 0xdc00 | (unicodeChar & 0x3ff);
  842. }
  843. }
  844. unsigned String::DecodeUTF16(const wchar_t*& src)
  845. {
  846. if (src == 0)
  847. return 0;
  848. unsigned short word1 = *src;
  849. // Check if we are at a low surrogate
  850. word1 = *src++;
  851. if (word1 >= 0xdc00 && word1 < 0xe000)
  852. {
  853. while (*src >= 0xdc00 && *src < 0xe000)
  854. ++src;
  855. return '?';
  856. }
  857. if (word1 < 0xd800 || word1 >= 0xe00)
  858. return word1;
  859. else
  860. {
  861. unsigned short word2 = *src++;
  862. if (word2 < 0xdc00 || word2 >= 0xe000)
  863. {
  864. --src;
  865. return '?';
  866. }
  867. else
  868. return ((word1 & 0x3ff) << 10) | (word2 & 0x3ff) | 0x10000;
  869. }
  870. }
  871. #endif
  872. Vector<String> String::Split(const char* str, char separator)
  873. {
  874. Vector<String> ret;
  875. unsigned pos = 0;
  876. unsigned length = CStringLength(str);
  877. while (pos < length)
  878. {
  879. if (str[pos] != separator)
  880. break;
  881. ++pos;
  882. }
  883. while (pos < length)
  884. {
  885. unsigned start = pos;
  886. while (start < length)
  887. {
  888. if (str[start] == separator)
  889. break;
  890. ++start;
  891. }
  892. if (start == length)
  893. {
  894. ret.Push(String(&str[pos]));
  895. break;
  896. }
  897. unsigned end = start;
  898. while (end < length)
  899. {
  900. if (str[end] != separator)
  901. break;
  902. ++end;
  903. }
  904. ret.Push(String(&str[pos], start - pos));
  905. pos = end;
  906. }
  907. return ret;
  908. }
  909. String String::Joined(const Vector<String>& subStrings, String glue)
  910. {
  911. if (subStrings.Empty())
  912. return String();
  913. String joinedString(subStrings[0]);
  914. for (unsigned i = 1; i < subStrings.Size(); ++i)
  915. joinedString.Append(glue).Append(subStrings[i]);
  916. return joinedString;
  917. }
  918. String& String::AppendWithFormat(const char* formatString, ... )
  919. {
  920. va_list args;
  921. va_start(args, formatString);
  922. AppendWithFormatArgs(formatString, args);
  923. va_end(args);
  924. return *this;
  925. }
  926. String& String::AppendWithFormatArgs(const char* formatString, va_list args)
  927. {
  928. int pos = 0, lastPos = 0;
  929. int length = strlen(formatString);
  930. while (true)
  931. {
  932. // Scan the format string and find %a argument where a is one of d, f, s ...
  933. while (pos < length && formatString[pos] != '%') pos++;
  934. Append(formatString + lastPos, pos - lastPos);
  935. if (pos >= length)
  936. return *this;
  937. char arg = formatString[pos + 1];
  938. pos += 2;
  939. lastPos = pos;
  940. switch (arg)
  941. {
  942. // Integer
  943. case 'd':
  944. case 'i':
  945. {
  946. int arg = va_arg(args, int);
  947. Append(String(arg));
  948. break;
  949. }
  950. // Unsigned
  951. case 'u':
  952. {
  953. unsigned arg = va_arg(args, unsigned);
  954. Append(String(arg));
  955. break;
  956. }
  957. // Real
  958. case 'f':
  959. {
  960. double arg = va_arg(args, double);
  961. Append(String(arg));
  962. break;
  963. }
  964. // Character
  965. case 'c':
  966. {
  967. int arg = va_arg(args, int);
  968. Append(arg);
  969. break;
  970. }
  971. // C string
  972. case 's':
  973. {
  974. char* arg = va_arg(args, char*);
  975. Append(arg);
  976. break;
  977. }
  978. // Hex
  979. case 'x':
  980. {
  981. char buf[CONVERSION_BUFFER_LENGTH];
  982. int arg = va_arg(args, int);
  983. int arglen = ::sprintf(buf, "%x", arg);
  984. Append(buf, arglen);
  985. break;
  986. }
  987. // Pointer
  988. case 'p':
  989. {
  990. char buf[CONVERSION_BUFFER_LENGTH];
  991. int arg = va_arg(args, int);
  992. int arglen = ::sprintf(buf, "%p", reinterpret_cast<void*>(arg));
  993. Append(buf, arglen);
  994. break;
  995. }
  996. case '%':
  997. {
  998. Append("%", 1);
  999. break;
  1000. }
  1001. }
  1002. }
  1003. return *this;
  1004. }
  1005. int String::Compare(const char* lhs, const char* rhs, bool caseSensitive)
  1006. {
  1007. if (!lhs || !rhs)
  1008. return lhs ? 1 : (rhs ? -1 : 0);
  1009. if (caseSensitive)
  1010. return strcmp(lhs, rhs);
  1011. else
  1012. {
  1013. for (;;)
  1014. {
  1015. char l = tolower(*lhs);
  1016. char r = tolower(*rhs);
  1017. if (!l || !r)
  1018. return l ? 1 : (r ? -1 : 0);
  1019. if (l < r)
  1020. return -1;
  1021. if (l > r)
  1022. return 1;
  1023. ++lhs;
  1024. ++rhs;
  1025. }
  1026. }
  1027. }
  1028. void String::Replace(unsigned pos, unsigned length, const char* srcStart, unsigned srcLength)
  1029. {
  1030. int delta = (int)srcLength - (int)length;
  1031. if (pos + length < length_)
  1032. {
  1033. if (delta < 0)
  1034. {
  1035. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  1036. Resize(length_ + delta);
  1037. }
  1038. if (delta > 0)
  1039. {
  1040. Resize(length_ + delta);
  1041. MoveRange(pos + srcLength, pos + length, length_ - pos - length - delta);
  1042. }
  1043. }
  1044. else
  1045. Resize(length_ + delta);
  1046. CopyChars(buffer_ + pos, srcStart, srcLength);
  1047. }
  1048. WString::WString() :
  1049. length_(0),
  1050. buffer_(0)
  1051. {
  1052. }
  1053. WString::WString(const String& str) :
  1054. length_(0),
  1055. buffer_(0)
  1056. {
  1057. #ifdef WIN32
  1058. unsigned neededSize = 0;
  1059. wchar_t temp[3];
  1060. unsigned byteOffset = 0;
  1061. while (byteOffset < str.Length())
  1062. {
  1063. wchar_t* dest = temp;
  1064. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  1065. neededSize += dest - temp;
  1066. }
  1067. Resize(neededSize);
  1068. byteOffset = 0;
  1069. wchar_t* dest = buffer_;
  1070. while (byteOffset < str.Length())
  1071. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  1072. #else
  1073. Resize(str.LengthUTF8());
  1074. unsigned byteOffset = 0;
  1075. wchar_t* dest = buffer_;
  1076. while (byteOffset < str.Length())
  1077. *dest++ = str.NextUTF8Char(byteOffset);
  1078. #endif
  1079. }
  1080. WString::~WString()
  1081. {
  1082. delete[] buffer_;
  1083. }
  1084. void WString::Resize(unsigned newLength)
  1085. {
  1086. if (!newLength)
  1087. {
  1088. delete[] buffer_;
  1089. buffer_ = 0;
  1090. length_ = 0;
  1091. }
  1092. else
  1093. {
  1094. wchar_t* newBuffer = new wchar_t[newLength + 1];
  1095. if (buffer_)
  1096. {
  1097. unsigned copyLength = length_ < newLength ? length_ : newLength;
  1098. memcpy(newBuffer, buffer_, copyLength * sizeof(wchar_t));
  1099. delete[] buffer_;
  1100. }
  1101. newBuffer[newLength] = 0;
  1102. buffer_ = newBuffer;
  1103. length_ = newLength;
  1104. }
  1105. }
  1106. }