Str.cpp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. //
  2. // Urho3D Engine
  3. // Copyright (c) 2008-2012 Lasse Öörni
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy
  6. // of this software and associated documentation files (the "Software"), to deal
  7. // in the Software without restriction, including without limitation the rights
  8. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. // copies of the Software, and to permit persons to whom the Software is
  10. // furnished to do so, subject to the following conditions:
  11. //
  12. // The above copyright notice and this permission notice shall be included in
  13. // all copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. // THE SOFTWARE.
  22. //
  23. #include "Str.h"
  24. #include "Swap.h"
  25. #include <cstdio>
  26. char String::endZero = 0;
  27. String::String(const WString& str) :
  28. length_(0),
  29. capacity_(0),
  30. buffer_(&endZero)
  31. {
  32. SetUTF8FromWChar(str.CString());
  33. }
  34. String::String(int value) :
  35. length_(0),
  36. capacity_(0),
  37. buffer_(&endZero)
  38. {
  39. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  40. sprintf(tempBuffer, "%d", value);
  41. *this = tempBuffer;
  42. }
  43. String::String(short value) :
  44. length_(0),
  45. capacity_(0),
  46. buffer_(&endZero)
  47. {
  48. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  49. sprintf(tempBuffer, "%d", value);
  50. *this = tempBuffer;
  51. }
  52. String::String(unsigned value) :
  53. length_(0),
  54. capacity_(0),
  55. buffer_(&endZero)
  56. {
  57. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  58. sprintf(tempBuffer, "%u", value);
  59. *this = tempBuffer;
  60. }
  61. String::String(unsigned short value) :
  62. length_(0),
  63. capacity_(0),
  64. buffer_(&endZero)
  65. {
  66. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  67. sprintf(tempBuffer, "%u", value);
  68. *this = tempBuffer;
  69. }
  70. String::String(float value) :
  71. length_(0),
  72. capacity_(0),
  73. buffer_(&endZero)
  74. {
  75. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  76. sprintf(tempBuffer, "%g", value);
  77. *this = tempBuffer;
  78. }
  79. String::String(double value) :
  80. length_(0),
  81. capacity_(0),
  82. buffer_(&endZero)
  83. {
  84. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  85. sprintf(tempBuffer, "%g", value);
  86. *this = tempBuffer;
  87. }
  88. String::String(bool value) :
  89. length_(0),
  90. capacity_(0),
  91. buffer_(&endZero)
  92. {
  93. if (value)
  94. *this = "true";
  95. else
  96. *this = "false";
  97. }
  98. String::String(char value) :
  99. length_(0),
  100. capacity_(0),
  101. buffer_(&endZero)
  102. {
  103. Resize(1);
  104. buffer_[0] = value;
  105. }
  106. String::String(char value, unsigned length) :
  107. length_(0),
  108. capacity_(0),
  109. buffer_(&endZero)
  110. {
  111. Resize(length);
  112. for (unsigned i = 0; i < length; ++i)
  113. buffer_[i] = value;
  114. }
  115. String& String::operator += (int rhs)
  116. {
  117. return *this += String(rhs);
  118. }
  119. String& String::operator += (short rhs)
  120. {
  121. return *this += String(rhs);
  122. }
  123. String& String::operator += (unsigned rhs)
  124. {
  125. return *this += String(rhs);
  126. }
  127. String& String::operator += (unsigned short rhs)
  128. {
  129. return *this += String(rhs);
  130. }
  131. String& String::operator += (float rhs)
  132. {
  133. return *this += String(rhs);
  134. }
  135. String& String::operator += (bool rhs)
  136. {
  137. return *this += String(rhs);
  138. }
  139. void String::Replace(char replaceThis, char replaceWith)
  140. {
  141. for (unsigned i = 0; i < length_; ++i)
  142. {
  143. if (buffer_[i] == replaceThis)
  144. buffer_[i] = replaceWith;
  145. }
  146. }
  147. void String::Replace(const String& replaceThis, const String& replaceWith)
  148. {
  149. unsigned nextPos = 0;
  150. while (nextPos < length_)
  151. {
  152. unsigned pos = Find(replaceThis, nextPos);
  153. if (pos == NPOS)
  154. break;
  155. Replace(pos, replaceThis.length_, replaceWith);
  156. nextPos = pos + replaceWith.length_;
  157. }
  158. }
  159. void String::Replace(unsigned pos, unsigned length, const String& str)
  160. {
  161. // If substring is illegal, do nothing
  162. if (pos + length > length_)
  163. return;
  164. Replace(pos, length, str.buffer_, str.length_);
  165. }
  166. String::Iterator String::Replace(const String::Iterator& start, const String::Iterator& end, const String& replaceWith)
  167. {
  168. unsigned pos = start - Begin();
  169. if (pos >= length_)
  170. return End();
  171. unsigned length = end - start;
  172. Replace(pos, length, replaceWith);
  173. return Begin() + pos;
  174. }
  175. String String::Replaced(char replaceThis, char replaceWith) const
  176. {
  177. String ret(*this);
  178. ret.Replace(replaceThis, replaceWith);
  179. return ret;
  180. }
  181. String String::Replaced(const String& replaceThis, const String& replaceWith) const
  182. {
  183. String ret(*this);
  184. ret.Replace(replaceThis, replaceWith);
  185. return ret;
  186. }
  187. void String::Append(const String& str)
  188. {
  189. *this += str;
  190. }
  191. void String::Append(const char* str)
  192. {
  193. *this += str;
  194. }
  195. void String::Append(char c)
  196. {
  197. *this += c;
  198. }
  199. void String::Append(const char* str, unsigned length)
  200. {
  201. if (!str)
  202. return;
  203. unsigned oldLength = length_;
  204. Resize(oldLength + length);
  205. CopyChars(&buffer_[oldLength], str, length);
  206. }
  207. void String::Insert(unsigned pos, const String& str)
  208. {
  209. if (pos > length_)
  210. pos = length_;
  211. if (pos == length_)
  212. (*this) += str;
  213. else
  214. Replace(pos, 0, str);
  215. }
  216. void String::Insert(unsigned pos, char c)
  217. {
  218. if (pos > length_)
  219. pos = length_;
  220. if (pos == length_)
  221. (*this) += c;
  222. else
  223. {
  224. unsigned oldLength = length_;
  225. Resize(length_ + 1);
  226. MoveRange(pos + 1, pos, oldLength - pos);
  227. buffer_[pos] = c;
  228. }
  229. }
  230. String::Iterator String::Insert(const String::Iterator& dest, const String& str)
  231. {
  232. unsigned pos = dest - Begin();
  233. if (pos > length_)
  234. pos = length_;
  235. Insert(pos, str);
  236. return Begin() + pos;
  237. }
  238. String::Iterator String::Insert(const String::Iterator& dest, const String::Iterator& start, const String::Iterator& end)
  239. {
  240. unsigned pos = dest - Begin();
  241. if (pos > length_)
  242. pos = length_;
  243. unsigned length = end - start;
  244. Replace(pos, 0, &(*start), length);
  245. return Begin() + pos;
  246. }
  247. String::Iterator String::Insert(const String::Iterator& dest, char c)
  248. {
  249. unsigned pos = dest - Begin();
  250. if (pos > length_)
  251. pos = length_;
  252. Insert(pos, c);
  253. return Begin() + pos;
  254. }
  255. void String::Erase(unsigned pos, unsigned length)
  256. {
  257. Replace(pos, length, String());
  258. }
  259. String::Iterator String::Erase(const String::Iterator& it)
  260. {
  261. unsigned pos = it - Begin();
  262. if (pos >= length_)
  263. return End();
  264. Erase(pos);
  265. return Begin() + pos;
  266. }
  267. String::Iterator String::Erase(const String::Iterator& start, const String::Iterator& end)
  268. {
  269. unsigned pos = start - Begin();
  270. if (pos >= length_)
  271. return End();
  272. unsigned length = end - start;
  273. Erase(pos, length);
  274. return Begin() + pos;
  275. }
  276. void String::Resize(unsigned newLength)
  277. {
  278. if (!capacity_)
  279. {
  280. // Calculate initial capacity
  281. capacity_ = newLength + 1;
  282. if (capacity_ < MIN_CAPACITY)
  283. capacity_ = MIN_CAPACITY;
  284. buffer_ = new char[capacity_];
  285. }
  286. else
  287. {
  288. if (newLength && capacity_ < newLength + 1)
  289. {
  290. // Increase the capacity with half each time it is exceeded
  291. while (capacity_ < newLength + 1)
  292. capacity_ += (capacity_ + 1) >> 1;
  293. char* newBuffer = new char[capacity_];
  294. // Move the existing data to the new buffer, then delete the old buffer
  295. if (length_)
  296. CopyChars(newBuffer, buffer_, length_);
  297. delete[] buffer_;
  298. buffer_ = newBuffer;
  299. }
  300. }
  301. buffer_[newLength] = 0;
  302. length_ = newLength;
  303. }
  304. void String::Reserve(unsigned newCapacity)
  305. {
  306. if (newCapacity < length_ + 1)
  307. newCapacity = length_ + 1;
  308. if (newCapacity == capacity_)
  309. return;
  310. char* newBuffer = new char[newCapacity];
  311. // Move the existing data to the new buffer, then delete the old buffer
  312. CopyChars(newBuffer, buffer_, length_ + 1);
  313. if (capacity_)
  314. delete[] buffer_;
  315. capacity_ = newCapacity;
  316. buffer_ = newBuffer;
  317. }
  318. void String::Compact()
  319. {
  320. if (capacity_)
  321. Reserve(length_ + 1);
  322. }
  323. void String::Clear()
  324. {
  325. Resize(0);
  326. }
  327. void String::Swap(String& str)
  328. {
  329. ::Swap(length_, str.length_);
  330. ::Swap(capacity_, str.capacity_);
  331. ::Swap(buffer_, str.buffer_);
  332. }
  333. String String::Substring(unsigned pos) const
  334. {
  335. if (pos < length_)
  336. {
  337. String ret;
  338. ret.Resize(length_ - pos);
  339. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  340. return ret;
  341. }
  342. else
  343. return String();
  344. }
  345. String String::Substring(unsigned pos, unsigned length) const
  346. {
  347. if (pos < length_)
  348. {
  349. String ret;
  350. if (pos + length > length_)
  351. length = length_ - pos;
  352. ret.Resize(length);
  353. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  354. return ret;
  355. }
  356. else
  357. return String();
  358. }
  359. String String::Trimmed() const
  360. {
  361. unsigned trimStart = 0;
  362. unsigned trimEnd = length_;
  363. while (trimStart < trimEnd)
  364. {
  365. char c = buffer_[trimStart];
  366. if (c != ' ' && c != 9)
  367. break;
  368. ++trimStart;
  369. }
  370. while (trimEnd > trimStart)
  371. {
  372. char c = buffer_[trimEnd - 1];
  373. if (c != ' ' && c != 9)
  374. break;
  375. --trimEnd;
  376. }
  377. return Substring(trimStart, trimEnd - trimStart);
  378. }
  379. String String::ToLower() const
  380. {
  381. String ret(*this);
  382. for (unsigned i = 0; i < ret.length_; ++i)
  383. ret[i] = tolower(buffer_[i]);
  384. return ret;
  385. }
  386. String String::ToUpper() const
  387. {
  388. String ret(*this);
  389. for (unsigned i = 0; i < ret.length_; ++i)
  390. ret[i] = toupper(buffer_[i]);
  391. return ret;
  392. }
  393. Vector<String> String::Split(char separator) const
  394. {
  395. return Split(CString(), separator);
  396. }
  397. unsigned String::Find(char c, unsigned startPos) const
  398. {
  399. for (unsigned i = startPos; i < length_; ++i)
  400. {
  401. if (buffer_[i] == c)
  402. return i;
  403. }
  404. return NPOS;
  405. }
  406. unsigned String::Find(const String& str, unsigned startPos) const
  407. {
  408. if (!str.length_ || str.length_ > length_)
  409. return NPOS;
  410. char first = str.buffer_[0];
  411. for (unsigned i = startPos; i <= length_ - str.length_; ++i)
  412. {
  413. if (buffer_[i] == first)
  414. {
  415. unsigned skip = NPOS;
  416. bool found = true;
  417. for (unsigned j = 1; j < str.length_; ++j)
  418. {
  419. char c = buffer_[i + j];
  420. if (skip == NPOS && c == first)
  421. skip = i + j - 1;
  422. if (c != str.buffer_[j])
  423. {
  424. found = false;
  425. if (skip != NPOS)
  426. i = skip;
  427. break;
  428. }
  429. }
  430. if (found)
  431. return i;
  432. }
  433. }
  434. return NPOS;
  435. }
  436. unsigned String::FindLast(char c, unsigned startPos) const
  437. {
  438. if (startPos >= length_)
  439. startPos = length_ - 1;
  440. for (unsigned i = startPos; i < length_; --i)
  441. {
  442. if (buffer_[i] == c)
  443. return i;
  444. }
  445. return NPOS;
  446. }
  447. unsigned String::FindLast(const String& str, unsigned startPos) const
  448. {
  449. if (!str.length_ || str.length_ > length_)
  450. return NPOS;
  451. if (startPos > length_ - str.length_)
  452. startPos = length_ - str.length_;
  453. char first = str.buffer_[0];
  454. for (unsigned i = startPos; i < length_; --i)
  455. {
  456. if (buffer_[i] == first)
  457. {
  458. bool found = true;
  459. for (unsigned j = 1; j < str.length_; ++j)
  460. {
  461. char c = buffer_[i + j];
  462. if (c != str.buffer_[j])
  463. {
  464. found = false;
  465. break;
  466. }
  467. }
  468. if (found)
  469. return i;
  470. }
  471. }
  472. return NPOS;
  473. }
  474. bool String::StartsWith(const String& str) const
  475. {
  476. return Find(str) == 0;
  477. }
  478. bool String::EndsWith(const String& str) const
  479. {
  480. return FindLast(str) == Length() - str.Length();
  481. }
  482. int String::Compare(const String& str, bool caseSensitive) const
  483. {
  484. return Compare(str.CString(), caseSensitive);
  485. }
  486. int String::Compare(const char* str, bool caseSensitive) const
  487. {
  488. const char* lhs = CString();
  489. const char* rhs = str;
  490. if (caseSensitive)
  491. return strcmp(lhs, rhs);
  492. else
  493. {
  494. if (!lhs || !rhs)
  495. return lhs ? 1 : (rhs ? -1 : 0);
  496. for (;;)
  497. {
  498. char l = tolower(*lhs);
  499. char r = tolower(*rhs);
  500. if (!l || !r)
  501. return l ? 1 : (r ? -1 : 0);
  502. if (l < r)
  503. return -1;
  504. if (l > r)
  505. return 1;
  506. ++lhs;
  507. ++rhs;
  508. }
  509. }
  510. }
  511. void String::SetUTF8FromLatin1(const char* str)
  512. {
  513. char temp[7];
  514. Clear();
  515. if (!str)
  516. return;
  517. while (*str)
  518. {
  519. char* dest = temp;
  520. EncodeUTF8(dest, *str++);
  521. *dest = 0;
  522. Append(temp);
  523. }
  524. }
  525. void String::SetUTF8FromWChar(const wchar_t* str)
  526. {
  527. char temp[7];
  528. Clear();
  529. if (!str)
  530. return;
  531. #ifdef WIN32
  532. while (*str)
  533. {
  534. unsigned unicodeChar = DecodeUTF16(str);
  535. char* dest = temp;
  536. EncodeUTF8(dest, unicodeChar);
  537. *dest = 0;
  538. Append(temp);
  539. }
  540. #else
  541. while (*str)
  542. {
  543. char* dest = temp;
  544. EncodeUTF8(dest, *str++);
  545. *dest = 0;
  546. Append(temp);
  547. }
  548. #endif
  549. }
  550. unsigned String::LengthUTF8() const
  551. {
  552. unsigned ret = 0;
  553. const char* src = buffer_;
  554. if (!src)
  555. return ret;
  556. const char* end = buffer_ + length_;
  557. while (src < end)
  558. {
  559. DecodeUTF8(src);
  560. ++ret;
  561. }
  562. return ret;
  563. }
  564. unsigned String::ByteOffsetUTF8(unsigned index) const
  565. {
  566. unsigned byteOffset = 0;
  567. unsigned utfPos = 0;
  568. while (utfPos < index && byteOffset < length_)
  569. {
  570. NextUTF8Char(byteOffset);
  571. ++utfPos;
  572. }
  573. return byteOffset;
  574. }
  575. unsigned String::NextUTF8Char(unsigned& byteOffset) const
  576. {
  577. if (!buffer_)
  578. return 0;
  579. const char* src = buffer_ + byteOffset;
  580. unsigned ret = DecodeUTF8(src);
  581. byteOffset = src - buffer_;
  582. return ret;
  583. }
  584. unsigned String::AtUTF8(unsigned index) const
  585. {
  586. unsigned byteOffset = ByteOffsetUTF8(index);
  587. return NextUTF8Char(byteOffset);
  588. }
  589. void String::ReplaceUTF8(unsigned index, unsigned unicodeChar)
  590. {
  591. unsigned utfPos = 0;
  592. unsigned byteOffset = 0;
  593. while (utfPos < index && byteOffset < length_)
  594. {
  595. NextUTF8Char(byteOffset);
  596. ++utfPos;
  597. }
  598. if (utfPos < index)
  599. return;
  600. unsigned beginCharPos = byteOffset;
  601. NextUTF8Char(byteOffset);
  602. char temp[7];
  603. char* dest = temp;
  604. EncodeUTF8(dest, unicodeChar);
  605. *dest = 0;
  606. Replace(beginCharPos, byteOffset - beginCharPos, temp, dest - temp);
  607. }
  608. void String::AppendUTF8(unsigned unicodeChar)
  609. {
  610. char temp[7];
  611. char* dest = temp;
  612. EncodeUTF8(dest, unicodeChar);
  613. *dest = 0;
  614. Append(temp);
  615. }
  616. String String::SubstringUTF8(unsigned pos) const
  617. {
  618. unsigned utf8Length = LengthUTF8();
  619. unsigned byteOffset = ByteOffsetUTF8(pos);
  620. String ret;
  621. while (pos < utf8Length)
  622. {
  623. ret.AppendUTF8(NextUTF8Char(byteOffset));
  624. ++pos;
  625. }
  626. return ret;
  627. }
  628. String String::SubstringUTF8(unsigned pos, unsigned length) const
  629. {
  630. unsigned utf8Length = LengthUTF8();
  631. unsigned byteOffset = ByteOffsetUTF8(pos);
  632. unsigned endPos = pos + length;
  633. String ret;
  634. while (pos < endPos && pos < utf8Length)
  635. {
  636. ret.AppendUTF8(NextUTF8Char(byteOffset));
  637. ++pos;
  638. }
  639. return ret;
  640. }
  641. void String::EncodeUTF8(char*& dest, unsigned unicodeChar)
  642. {
  643. if (unicodeChar < 0x80)
  644. *dest++ = unicodeChar;
  645. else if (unicodeChar < 0x800)
  646. {
  647. *dest++ = 0xc0 | ((unicodeChar >> 6) & 0x1f);
  648. *dest++ = 0x80 | (unicodeChar & 0x3f);
  649. }
  650. else if (unicodeChar < 0x10000)
  651. {
  652. *dest++ = 0xe0 | ((unicodeChar >> 12) & 0xf);
  653. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  654. *dest++ = 0x80 | (unicodeChar & 0x3f);
  655. }
  656. else if (unicodeChar < 0x200000)
  657. {
  658. *dest++ = 0xf0 | ((unicodeChar >> 18) & 0x7);
  659. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  660. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  661. *dest++ = 0x80 | (unicodeChar & 0x3f);
  662. }
  663. else if (unicodeChar < 0x4000000)
  664. {
  665. *dest++ = 0xf8 | ((unicodeChar >> 24) & 0x3);
  666. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  667. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  668. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  669. *dest++ = 0x80 | (unicodeChar & 0x3f);
  670. }
  671. else
  672. {
  673. *dest++ = 0xfc | ((unicodeChar >> 30) & 0x1);
  674. *dest++ = 0x80 | ((unicodeChar >> 24) & 0x3f);
  675. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  676. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  677. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  678. *dest++ = 0x80 | (unicodeChar & 0x3f);
  679. }
  680. }
  681. #define GET_NEXT_CONTINUATION_BYTE(ptr) *ptr; if ((unsigned char)*ptr < 0x80 || (unsigned char)*ptr >= 0xc0) return '?'; else ++ptr;
  682. unsigned String::DecodeUTF8(const char*& src)
  683. {
  684. if (src == 0)
  685. return 0;
  686. unsigned char char1 = *src++;
  687. // Check if we are in the middle of a UTF8 character
  688. if (char1 >= 0x80 && char1 < 0xc0)
  689. {
  690. while ((unsigned char)*src >= 0x80 && (unsigned char)*src < 0xc0)
  691. ++src;
  692. return '?';
  693. }
  694. if (char1 < 0x80)
  695. return char1;
  696. else if (char1 < 0xe0)
  697. {
  698. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  699. return (char2 & 0x3f) | ((char1 & 0x1f) << 6);
  700. }
  701. else if (char1 < 0xf0)
  702. {
  703. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  704. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  705. return (char3 & 0x3f) | ((char2 & 0x3f) << 6) | ((char1 & 0xf) << 12);
  706. }
  707. else if (char1 < 0xf8)
  708. {
  709. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  710. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  711. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  712. return (char4 & 0x3f) | ((char3 & 0x3f) << 6) | ((char2 & 0x3f) << 12) | ((char1 & 0x7) << 18);
  713. }
  714. else if (char1 < 0xfc)
  715. {
  716. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  717. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  718. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  719. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  720. return (char5 & 0x3f) | ((char4 & 0x3f) << 6) | ((char3 & 0x3f) << 12) | ((char2 & 0x3f) << 18) | ((char1 & 0x3) << 24);
  721. }
  722. else
  723. {
  724. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  725. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  726. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  727. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  728. unsigned char char6 = GET_NEXT_CONTINUATION_BYTE(src);
  729. return (char6 & 0x3f) | ((char5 & 0x3f) << 6) | ((char4 & 0x3f) << 12) | ((char3 & 0x3f) << 18) | ((char2 & 0x3f) << 24) |
  730. ((char1 & 0x1) << 30);
  731. }
  732. }
  733. #ifdef WIN32
  734. void String::EncodeUTF16(wchar_t*& dest, unsigned unicodeChar)
  735. {
  736. if (unicodeChar < 0x10000)
  737. *dest++ = unicodeChar;
  738. else
  739. {
  740. unicodeChar -= 0x10000;
  741. *dest++ = 0xd800 | ((unicodeChar >> 10) & 0x3ff);
  742. *dest++ = 0xdc00 | (unicodeChar & 0x3ff);
  743. }
  744. }
  745. unsigned String::DecodeUTF16(const wchar_t*& src)
  746. {
  747. if (src == 0)
  748. return 0;
  749. unsigned short word1 = *src;
  750. // Check if we are at a low surrogate
  751. word1 = *src++;
  752. if (word1 >= 0xdc00 && word1 < 0xe000)
  753. {
  754. while (*src >= 0xdc00 && *src < 0xe000)
  755. ++src;
  756. return '?';
  757. }
  758. if (word1 < 0xd800 || word1 >= 0xe00)
  759. return word1;
  760. else
  761. {
  762. unsigned short word2 = *src++;
  763. if (word2 < 0xdc00 || word2 >= 0xe000)
  764. {
  765. --src;
  766. return '?';
  767. }
  768. else
  769. return ((word1 & 0x3ff) << 10) | (word2 & 0x3ff) | 0x10000;
  770. }
  771. }
  772. #endif
  773. Vector<String> String::Split(const char* str, char separator)
  774. {
  775. Vector<String> ret;
  776. unsigned pos = 0;
  777. unsigned length = CStringLength(str);
  778. while (pos < length)
  779. {
  780. if (str[pos] != separator)
  781. break;
  782. ++pos;
  783. }
  784. while (pos < length)
  785. {
  786. unsigned start = pos;
  787. while (start < length)
  788. {
  789. if (str[start] == separator)
  790. break;
  791. ++start;
  792. }
  793. if (start == length)
  794. {
  795. ret.Push(String(&str[pos]));
  796. break;
  797. }
  798. unsigned end = start;
  799. while (end < length)
  800. {
  801. if (str[end] != separator)
  802. break;
  803. ++end;
  804. }
  805. ret.Push(String(&str[pos], start - pos));
  806. pos = end;
  807. }
  808. return ret;
  809. }
  810. void String::Replace(unsigned pos, unsigned length, const char* srcStart, unsigned srcLength)
  811. {
  812. int delta = (int)srcLength - (int)length;
  813. if (pos + length < length_)
  814. {
  815. if (delta < 0)
  816. {
  817. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  818. Resize(length_ + delta);
  819. }
  820. if (delta > 0)
  821. {
  822. Resize(length_ + delta);
  823. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  824. }
  825. }
  826. else
  827. Resize(length_ + delta);
  828. CopyChars(buffer_ + pos, srcStart, srcLength);
  829. }
  830. WString::WString() :
  831. length_(0),
  832. buffer_(0)
  833. {
  834. }
  835. WString::WString(const String& str) :
  836. length_(0),
  837. buffer_(0)
  838. {
  839. #ifdef WIN32
  840. unsigned neededSize = 0;
  841. wchar_t temp[3];
  842. unsigned byteOffset = 0;
  843. while (byteOffset < str.Length())
  844. {
  845. wchar_t* dest = temp;
  846. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  847. neededSize += dest - temp;
  848. }
  849. Resize(neededSize);
  850. byteOffset = 0;
  851. wchar_t* dest = buffer_;
  852. while (byteOffset < str.Length())
  853. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  854. #else
  855. Resize(str.LengthUTF8());
  856. unsigned byteOffset = 0;
  857. wchar_t* dest = buffer_;
  858. while (byteOffset < str.Length())
  859. *dest++ = str.NextUTF8Char(byteOffset);
  860. #endif
  861. }
  862. WString::~WString()
  863. {
  864. delete[] buffer_;
  865. }
  866. void WString::Resize(unsigned newSize)
  867. {
  868. if (!newSize)
  869. {
  870. delete[] buffer_;
  871. buffer_ = 0;
  872. length_ = 0;
  873. }
  874. else
  875. {
  876. wchar_t* newBuffer = new wchar_t[newSize + 1];
  877. if (buffer_)
  878. memcpy(newBuffer, buffer_, length_ * sizeof(wchar_t));
  879. newBuffer[newSize] = 0;
  880. buffer_ = newBuffer;
  881. length_ = newSize;
  882. }
  883. }