Str.cpp 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. //
  2. // Urho3D Engine
  3. // Copyright (c) 2008-2012 Lasse Öörni
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy
  6. // of this software and associated documentation files (the "Software"), to deal
  7. // in the Software without restriction, including without limitation the rights
  8. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. // copies of the Software, and to permit persons to whom the Software is
  10. // furnished to do so, subject to the following conditions:
  11. //
  12. // The above copyright notice and this permission notice shall be included in
  13. // all copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. // THE SOFTWARE.
  22. //
  23. #include "Str.h"
  24. #include "Swap.h"
  25. #include <cstdio>
  26. #include "DebugNew.h"
  27. char String::endZero = 0;
  28. const String String::EMPTY;
  29. String::String(const WString& str) :
  30. length_(0),
  31. capacity_(0),
  32. buffer_(&endZero)
  33. {
  34. SetUTF8FromWChar(str.CString());
  35. }
  36. String::String(int value) :
  37. length_(0),
  38. capacity_(0),
  39. buffer_(&endZero)
  40. {
  41. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  42. sprintf(tempBuffer, "%d", value);
  43. *this = tempBuffer;
  44. }
  45. String::String(short value) :
  46. length_(0),
  47. capacity_(0),
  48. buffer_(&endZero)
  49. {
  50. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  51. sprintf(tempBuffer, "%d", value);
  52. *this = tempBuffer;
  53. }
  54. String::String(unsigned value) :
  55. length_(0),
  56. capacity_(0),
  57. buffer_(&endZero)
  58. {
  59. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  60. sprintf(tempBuffer, "%u", value);
  61. *this = tempBuffer;
  62. }
  63. String::String(unsigned short value) :
  64. length_(0),
  65. capacity_(0),
  66. buffer_(&endZero)
  67. {
  68. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  69. sprintf(tempBuffer, "%u", value);
  70. *this = tempBuffer;
  71. }
  72. String::String(float value) :
  73. length_(0),
  74. capacity_(0),
  75. buffer_(&endZero)
  76. {
  77. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  78. sprintf(tempBuffer, "%g", value);
  79. *this = tempBuffer;
  80. }
  81. String::String(double value) :
  82. length_(0),
  83. capacity_(0),
  84. buffer_(&endZero)
  85. {
  86. char tempBuffer[CONVERSION_BUFFER_LENGTH];
  87. sprintf(tempBuffer, "%g", value);
  88. *this = tempBuffer;
  89. }
  90. String::String(bool value) :
  91. length_(0),
  92. capacity_(0),
  93. buffer_(&endZero)
  94. {
  95. if (value)
  96. *this = "true";
  97. else
  98. *this = "false";
  99. }
  100. String::String(char value) :
  101. length_(0),
  102. capacity_(0),
  103. buffer_(&endZero)
  104. {
  105. Resize(1);
  106. buffer_[0] = value;
  107. }
  108. String::String(char value, unsigned length) :
  109. length_(0),
  110. capacity_(0),
  111. buffer_(&endZero)
  112. {
  113. Resize(length);
  114. for (unsigned i = 0; i < length; ++i)
  115. buffer_[i] = value;
  116. }
  117. String& String::operator += (int rhs)
  118. {
  119. return *this += String(rhs);
  120. }
  121. String& String::operator += (short rhs)
  122. {
  123. return *this += String(rhs);
  124. }
  125. String& String::operator += (unsigned rhs)
  126. {
  127. return *this += String(rhs);
  128. }
  129. String& String::operator += (unsigned short rhs)
  130. {
  131. return *this += String(rhs);
  132. }
  133. String& String::operator += (float rhs)
  134. {
  135. return *this += String(rhs);
  136. }
  137. String& String::operator += (bool rhs)
  138. {
  139. return *this += String(rhs);
  140. }
  141. void String::Replace(char replaceThis, char replaceWith)
  142. {
  143. for (unsigned i = 0; i < length_; ++i)
  144. {
  145. if (buffer_[i] == replaceThis)
  146. buffer_[i] = replaceWith;
  147. }
  148. }
  149. void String::Replace(const String& replaceThis, const String& replaceWith)
  150. {
  151. unsigned nextPos = 0;
  152. while (nextPos < length_)
  153. {
  154. unsigned pos = Find(replaceThis, nextPos);
  155. if (pos == NPOS)
  156. break;
  157. Replace(pos, replaceThis.length_, replaceWith);
  158. nextPos = pos + replaceWith.length_;
  159. }
  160. }
  161. void String::Replace(unsigned pos, unsigned length, const String& str)
  162. {
  163. // If substring is illegal, do nothing
  164. if (pos + length > length_)
  165. return;
  166. Replace(pos, length, str.buffer_, str.length_);
  167. }
  168. String::Iterator String::Replace(const String::Iterator& start, const String::Iterator& end, const String& replaceWith)
  169. {
  170. unsigned pos = start - Begin();
  171. if (pos >= length_)
  172. return End();
  173. unsigned length = end - start;
  174. Replace(pos, length, replaceWith);
  175. return Begin() + pos;
  176. }
  177. String String::Replaced(char replaceThis, char replaceWith) const
  178. {
  179. String ret(*this);
  180. ret.Replace(replaceThis, replaceWith);
  181. return ret;
  182. }
  183. String String::Replaced(const String& replaceThis, const String& replaceWith) const
  184. {
  185. String ret(*this);
  186. ret.Replace(replaceThis, replaceWith);
  187. return ret;
  188. }
  189. void String::Append(const String& str)
  190. {
  191. *this += str;
  192. }
  193. void String::Append(const char* str)
  194. {
  195. *this += str;
  196. }
  197. void String::Append(char c)
  198. {
  199. *this += c;
  200. }
  201. void String::Append(const char* str, unsigned length)
  202. {
  203. if (!str)
  204. return;
  205. unsigned oldLength = length_;
  206. Resize(oldLength + length);
  207. CopyChars(&buffer_[oldLength], str, length);
  208. }
  209. void String::Insert(unsigned pos, const String& str)
  210. {
  211. if (pos > length_)
  212. pos = length_;
  213. if (pos == length_)
  214. (*this) += str;
  215. else
  216. Replace(pos, 0, str);
  217. }
  218. void String::Insert(unsigned pos, char c)
  219. {
  220. if (pos > length_)
  221. pos = length_;
  222. if (pos == length_)
  223. (*this) += c;
  224. else
  225. {
  226. unsigned oldLength = length_;
  227. Resize(length_ + 1);
  228. MoveRange(pos + 1, pos, oldLength - pos);
  229. buffer_[pos] = c;
  230. }
  231. }
  232. String::Iterator String::Insert(const String::Iterator& dest, const String& str)
  233. {
  234. unsigned pos = dest - Begin();
  235. if (pos > length_)
  236. pos = length_;
  237. Insert(pos, str);
  238. return Begin() + pos;
  239. }
  240. String::Iterator String::Insert(const String::Iterator& dest, const String::Iterator& start, const String::Iterator& end)
  241. {
  242. unsigned pos = dest - Begin();
  243. if (pos > length_)
  244. pos = length_;
  245. unsigned length = end - start;
  246. Replace(pos, 0, &(*start), length);
  247. return Begin() + pos;
  248. }
  249. String::Iterator String::Insert(const String::Iterator& dest, char c)
  250. {
  251. unsigned pos = dest - Begin();
  252. if (pos > length_)
  253. pos = length_;
  254. Insert(pos, c);
  255. return Begin() + pos;
  256. }
  257. void String::Erase(unsigned pos, unsigned length)
  258. {
  259. Replace(pos, length, String());
  260. }
  261. String::Iterator String::Erase(const String::Iterator& it)
  262. {
  263. unsigned pos = it - Begin();
  264. if (pos >= length_)
  265. return End();
  266. Erase(pos);
  267. return Begin() + pos;
  268. }
  269. String::Iterator String::Erase(const String::Iterator& start, const String::Iterator& end)
  270. {
  271. unsigned pos = start - Begin();
  272. if (pos >= length_)
  273. return End();
  274. unsigned length = end - start;
  275. Erase(pos, length);
  276. return Begin() + pos;
  277. }
  278. void String::Resize(unsigned newLength)
  279. {
  280. if (!capacity_)
  281. {
  282. // Calculate initial capacity
  283. capacity_ = newLength + 1;
  284. if (capacity_ < MIN_CAPACITY)
  285. capacity_ = MIN_CAPACITY;
  286. buffer_ = new char[capacity_];
  287. }
  288. else
  289. {
  290. if (newLength && capacity_ < newLength + 1)
  291. {
  292. // Increase the capacity with half each time it is exceeded
  293. while (capacity_ < newLength + 1)
  294. capacity_ += (capacity_ + 1) >> 1;
  295. char* newBuffer = new char[capacity_];
  296. // Move the existing data to the new buffer, then delete the old buffer
  297. if (length_)
  298. CopyChars(newBuffer, buffer_, length_);
  299. delete[] buffer_;
  300. buffer_ = newBuffer;
  301. }
  302. }
  303. buffer_[newLength] = 0;
  304. length_ = newLength;
  305. }
  306. void String::Reserve(unsigned newCapacity)
  307. {
  308. if (newCapacity < length_ + 1)
  309. newCapacity = length_ + 1;
  310. if (newCapacity == capacity_)
  311. return;
  312. char* newBuffer = new char[newCapacity];
  313. // Move the existing data to the new buffer, then delete the old buffer
  314. CopyChars(newBuffer, buffer_, length_ + 1);
  315. if (capacity_)
  316. delete[] buffer_;
  317. capacity_ = newCapacity;
  318. buffer_ = newBuffer;
  319. }
  320. void String::Compact()
  321. {
  322. if (capacity_)
  323. Reserve(length_ + 1);
  324. }
  325. void String::Clear()
  326. {
  327. Resize(0);
  328. }
  329. void String::Swap(String& str)
  330. {
  331. ::Swap(length_, str.length_);
  332. ::Swap(capacity_, str.capacity_);
  333. ::Swap(buffer_, str.buffer_);
  334. }
  335. String String::Substring(unsigned pos) const
  336. {
  337. if (pos < length_)
  338. {
  339. String ret;
  340. ret.Resize(length_ - pos);
  341. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  342. return ret;
  343. }
  344. else
  345. return String();
  346. }
  347. String String::Substring(unsigned pos, unsigned length) const
  348. {
  349. if (pos < length_)
  350. {
  351. String ret;
  352. if (pos + length > length_)
  353. length = length_ - pos;
  354. ret.Resize(length);
  355. CopyChars(ret.buffer_, buffer_ + pos, ret.length_);
  356. return ret;
  357. }
  358. else
  359. return String();
  360. }
  361. String String::Trimmed() const
  362. {
  363. unsigned trimStart = 0;
  364. unsigned trimEnd = length_;
  365. while (trimStart < trimEnd)
  366. {
  367. char c = buffer_[trimStart];
  368. if (c != ' ' && c != 9)
  369. break;
  370. ++trimStart;
  371. }
  372. while (trimEnd > trimStart)
  373. {
  374. char c = buffer_[trimEnd - 1];
  375. if (c != ' ' && c != 9)
  376. break;
  377. --trimEnd;
  378. }
  379. return Substring(trimStart, trimEnd - trimStart);
  380. }
  381. String String::ToLower() const
  382. {
  383. String ret(*this);
  384. for (unsigned i = 0; i < ret.length_; ++i)
  385. ret[i] = tolower(buffer_[i]);
  386. return ret;
  387. }
  388. String String::ToUpper() const
  389. {
  390. String ret(*this);
  391. for (unsigned i = 0; i < ret.length_; ++i)
  392. ret[i] = toupper(buffer_[i]);
  393. return ret;
  394. }
  395. Vector<String> String::Split(char separator) const
  396. {
  397. return Split(CString(), separator);
  398. }
  399. unsigned String::Find(char c, unsigned startPos) const
  400. {
  401. for (unsigned i = startPos; i < length_; ++i)
  402. {
  403. if (buffer_[i] == c)
  404. return i;
  405. }
  406. return NPOS;
  407. }
  408. unsigned String::Find(const String& str, unsigned startPos) const
  409. {
  410. if (!str.length_ || str.length_ > length_)
  411. return NPOS;
  412. char first = str.buffer_[0];
  413. for (unsigned i = startPos; i <= length_ - str.length_; ++i)
  414. {
  415. if (buffer_[i] == first)
  416. {
  417. unsigned skip = NPOS;
  418. bool found = true;
  419. for (unsigned j = 1; j < str.length_; ++j)
  420. {
  421. char c = buffer_[i + j];
  422. if (skip == NPOS && c == first)
  423. skip = i + j - 1;
  424. if (c != str.buffer_[j])
  425. {
  426. found = false;
  427. if (skip != NPOS)
  428. i = skip;
  429. break;
  430. }
  431. }
  432. if (found)
  433. return i;
  434. }
  435. }
  436. return NPOS;
  437. }
  438. unsigned String::FindLast(char c, unsigned startPos) const
  439. {
  440. if (startPos >= length_)
  441. startPos = length_ - 1;
  442. for (unsigned i = startPos; i < length_; --i)
  443. {
  444. if (buffer_[i] == c)
  445. return i;
  446. }
  447. return NPOS;
  448. }
  449. unsigned String::FindLast(const String& str, unsigned startPos) const
  450. {
  451. if (!str.length_ || str.length_ > length_)
  452. return NPOS;
  453. if (startPos > length_ - str.length_)
  454. startPos = length_ - str.length_;
  455. char first = str.buffer_[0];
  456. for (unsigned i = startPos; i < length_; --i)
  457. {
  458. if (buffer_[i] == first)
  459. {
  460. bool found = true;
  461. for (unsigned j = 1; j < str.length_; ++j)
  462. {
  463. char c = buffer_[i + j];
  464. if (c != str.buffer_[j])
  465. {
  466. found = false;
  467. break;
  468. }
  469. }
  470. if (found)
  471. return i;
  472. }
  473. }
  474. return NPOS;
  475. }
  476. bool String::StartsWith(const String& str) const
  477. {
  478. return Find(str) == 0;
  479. }
  480. bool String::EndsWith(const String& str) const
  481. {
  482. return FindLast(str) == Length() - str.Length();
  483. }
  484. int String::Compare(const String& str, bool caseSensitive) const
  485. {
  486. return Compare(CString(), str.CString(), caseSensitive);
  487. }
  488. int String::Compare(const char* str, bool caseSensitive) const
  489. {
  490. return Compare(CString(), str, caseSensitive);
  491. }
  492. void String::SetUTF8FromLatin1(const char* str)
  493. {
  494. char temp[7];
  495. Clear();
  496. if (!str)
  497. return;
  498. while (*str)
  499. {
  500. char* dest = temp;
  501. EncodeUTF8(dest, *str++);
  502. *dest = 0;
  503. Append(temp);
  504. }
  505. }
  506. void String::SetUTF8FromWChar(const wchar_t* str)
  507. {
  508. char temp[7];
  509. Clear();
  510. if (!str)
  511. return;
  512. #ifdef WIN32
  513. while (*str)
  514. {
  515. unsigned unicodeChar = DecodeUTF16(str);
  516. char* dest = temp;
  517. EncodeUTF8(dest, unicodeChar);
  518. *dest = 0;
  519. Append(temp);
  520. }
  521. #else
  522. while (*str)
  523. {
  524. char* dest = temp;
  525. EncodeUTF8(dest, *str++);
  526. *dest = 0;
  527. Append(temp);
  528. }
  529. #endif
  530. }
  531. unsigned String::LengthUTF8() const
  532. {
  533. unsigned ret = 0;
  534. const char* src = buffer_;
  535. if (!src)
  536. return ret;
  537. const char* end = buffer_ + length_;
  538. while (src < end)
  539. {
  540. DecodeUTF8(src);
  541. ++ret;
  542. }
  543. return ret;
  544. }
  545. unsigned String::ByteOffsetUTF8(unsigned index) const
  546. {
  547. unsigned byteOffset = 0;
  548. unsigned utfPos = 0;
  549. while (utfPos < index && byteOffset < length_)
  550. {
  551. NextUTF8Char(byteOffset);
  552. ++utfPos;
  553. }
  554. return byteOffset;
  555. }
  556. unsigned String::NextUTF8Char(unsigned& byteOffset) const
  557. {
  558. if (!buffer_)
  559. return 0;
  560. const char* src = buffer_ + byteOffset;
  561. unsigned ret = DecodeUTF8(src);
  562. byteOffset = src - buffer_;
  563. return ret;
  564. }
  565. unsigned String::AtUTF8(unsigned index) const
  566. {
  567. unsigned byteOffset = ByteOffsetUTF8(index);
  568. return NextUTF8Char(byteOffset);
  569. }
  570. void String::ReplaceUTF8(unsigned index, unsigned unicodeChar)
  571. {
  572. unsigned utfPos = 0;
  573. unsigned byteOffset = 0;
  574. while (utfPos < index && byteOffset < length_)
  575. {
  576. NextUTF8Char(byteOffset);
  577. ++utfPos;
  578. }
  579. if (utfPos < index)
  580. return;
  581. unsigned beginCharPos = byteOffset;
  582. NextUTF8Char(byteOffset);
  583. char temp[7];
  584. char* dest = temp;
  585. EncodeUTF8(dest, unicodeChar);
  586. *dest = 0;
  587. Replace(beginCharPos, byteOffset - beginCharPos, temp, dest - temp);
  588. }
  589. void String::AppendUTF8(unsigned unicodeChar)
  590. {
  591. char temp[7];
  592. char* dest = temp;
  593. EncodeUTF8(dest, unicodeChar);
  594. *dest = 0;
  595. Append(temp);
  596. }
  597. String String::SubstringUTF8(unsigned pos) const
  598. {
  599. unsigned utf8Length = LengthUTF8();
  600. unsigned byteOffset = ByteOffsetUTF8(pos);
  601. String ret;
  602. while (pos < utf8Length)
  603. {
  604. ret.AppendUTF8(NextUTF8Char(byteOffset));
  605. ++pos;
  606. }
  607. return ret;
  608. }
  609. String String::SubstringUTF8(unsigned pos, unsigned length) const
  610. {
  611. unsigned utf8Length = LengthUTF8();
  612. unsigned byteOffset = ByteOffsetUTF8(pos);
  613. unsigned endPos = pos + length;
  614. String ret;
  615. while (pos < endPos && pos < utf8Length)
  616. {
  617. ret.AppendUTF8(NextUTF8Char(byteOffset));
  618. ++pos;
  619. }
  620. return ret;
  621. }
  622. void String::EncodeUTF8(char*& dest, unsigned unicodeChar)
  623. {
  624. if (unicodeChar < 0x80)
  625. *dest++ = unicodeChar;
  626. else if (unicodeChar < 0x800)
  627. {
  628. *dest++ = 0xc0 | ((unicodeChar >> 6) & 0x1f);
  629. *dest++ = 0x80 | (unicodeChar & 0x3f);
  630. }
  631. else if (unicodeChar < 0x10000)
  632. {
  633. *dest++ = 0xe0 | ((unicodeChar >> 12) & 0xf);
  634. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  635. *dest++ = 0x80 | (unicodeChar & 0x3f);
  636. }
  637. else if (unicodeChar < 0x200000)
  638. {
  639. *dest++ = 0xf0 | ((unicodeChar >> 18) & 0x7);
  640. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  641. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  642. *dest++ = 0x80 | (unicodeChar & 0x3f);
  643. }
  644. else if (unicodeChar < 0x4000000)
  645. {
  646. *dest++ = 0xf8 | ((unicodeChar >> 24) & 0x3);
  647. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  648. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  649. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  650. *dest++ = 0x80 | (unicodeChar & 0x3f);
  651. }
  652. else
  653. {
  654. *dest++ = 0xfc | ((unicodeChar >> 30) & 0x1);
  655. *dest++ = 0x80 | ((unicodeChar >> 24) & 0x3f);
  656. *dest++ = 0x80 | ((unicodeChar >> 18) & 0x3f);
  657. *dest++ = 0x80 | ((unicodeChar >> 12) & 0x3f);
  658. *dest++ = 0x80 | ((unicodeChar >> 6) & 0x3f);
  659. *dest++ = 0x80 | (unicodeChar & 0x3f);
  660. }
  661. }
  662. #define GET_NEXT_CONTINUATION_BYTE(ptr) *ptr; if ((unsigned char)*ptr < 0x80 || (unsigned char)*ptr >= 0xc0) return '?'; else ++ptr;
  663. unsigned String::DecodeUTF8(const char*& src)
  664. {
  665. if (src == 0)
  666. return 0;
  667. unsigned char char1 = *src++;
  668. // Check if we are in the middle of a UTF8 character
  669. if (char1 >= 0x80 && char1 < 0xc0)
  670. {
  671. while ((unsigned char)*src >= 0x80 && (unsigned char)*src < 0xc0)
  672. ++src;
  673. return '?';
  674. }
  675. if (char1 < 0x80)
  676. return char1;
  677. else if (char1 < 0xe0)
  678. {
  679. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  680. return (char2 & 0x3f) | ((char1 & 0x1f) << 6);
  681. }
  682. else if (char1 < 0xf0)
  683. {
  684. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  685. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  686. return (char3 & 0x3f) | ((char2 & 0x3f) << 6) | ((char1 & 0xf) << 12);
  687. }
  688. else if (char1 < 0xf8)
  689. {
  690. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  691. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  692. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  693. return (char4 & 0x3f) | ((char3 & 0x3f) << 6) | ((char2 & 0x3f) << 12) | ((char1 & 0x7) << 18);
  694. }
  695. else if (char1 < 0xfc)
  696. {
  697. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  698. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  699. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  700. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  701. return (char5 & 0x3f) | ((char4 & 0x3f) << 6) | ((char3 & 0x3f) << 12) | ((char2 & 0x3f) << 18) | ((char1 & 0x3) << 24);
  702. }
  703. else
  704. {
  705. unsigned char char2 = GET_NEXT_CONTINUATION_BYTE(src);
  706. unsigned char char3 = GET_NEXT_CONTINUATION_BYTE(src);
  707. unsigned char char4 = GET_NEXT_CONTINUATION_BYTE(src);
  708. unsigned char char5 = GET_NEXT_CONTINUATION_BYTE(src);
  709. unsigned char char6 = GET_NEXT_CONTINUATION_BYTE(src);
  710. return (char6 & 0x3f) | ((char5 & 0x3f) << 6) | ((char4 & 0x3f) << 12) | ((char3 & 0x3f) << 18) | ((char2 & 0x3f) << 24) |
  711. ((char1 & 0x1) << 30);
  712. }
  713. }
  714. #ifdef WIN32
  715. void String::EncodeUTF16(wchar_t*& dest, unsigned unicodeChar)
  716. {
  717. if (unicodeChar < 0x10000)
  718. *dest++ = unicodeChar;
  719. else
  720. {
  721. unicodeChar -= 0x10000;
  722. *dest++ = 0xd800 | ((unicodeChar >> 10) & 0x3ff);
  723. *dest++ = 0xdc00 | (unicodeChar & 0x3ff);
  724. }
  725. }
  726. unsigned String::DecodeUTF16(const wchar_t*& src)
  727. {
  728. if (src == 0)
  729. return 0;
  730. unsigned short word1 = *src;
  731. // Check if we are at a low surrogate
  732. word1 = *src++;
  733. if (word1 >= 0xdc00 && word1 < 0xe000)
  734. {
  735. while (*src >= 0xdc00 && *src < 0xe000)
  736. ++src;
  737. return '?';
  738. }
  739. if (word1 < 0xd800 || word1 >= 0xe00)
  740. return word1;
  741. else
  742. {
  743. unsigned short word2 = *src++;
  744. if (word2 < 0xdc00 || word2 >= 0xe000)
  745. {
  746. --src;
  747. return '?';
  748. }
  749. else
  750. return ((word1 & 0x3ff) << 10) | (word2 & 0x3ff) | 0x10000;
  751. }
  752. }
  753. #endif
  754. Vector<String> String::Split(const char* str, char separator)
  755. {
  756. Vector<String> ret;
  757. unsigned pos = 0;
  758. unsigned length = CStringLength(str);
  759. while (pos < length)
  760. {
  761. if (str[pos] != separator)
  762. break;
  763. ++pos;
  764. }
  765. while (pos < length)
  766. {
  767. unsigned start = pos;
  768. while (start < length)
  769. {
  770. if (str[start] == separator)
  771. break;
  772. ++start;
  773. }
  774. if (start == length)
  775. {
  776. ret.Push(String(&str[pos]));
  777. break;
  778. }
  779. unsigned end = start;
  780. while (end < length)
  781. {
  782. if (str[end] != separator)
  783. break;
  784. ++end;
  785. }
  786. ret.Push(String(&str[pos], start - pos));
  787. pos = end;
  788. }
  789. return ret;
  790. }
  791. int String::Compare(const char* lhs, const char* rhs, bool caseSensitive)
  792. {
  793. if (!lhs || !rhs)
  794. return lhs ? 1 : (rhs ? -1 : 0);
  795. if (caseSensitive)
  796. return strcmp(lhs, rhs);
  797. else
  798. {
  799. for (;;)
  800. {
  801. char l = tolower(*lhs);
  802. char r = tolower(*rhs);
  803. if (!l || !r)
  804. return l ? 1 : (r ? -1 : 0);
  805. if (l < r)
  806. return -1;
  807. if (l > r)
  808. return 1;
  809. ++lhs;
  810. ++rhs;
  811. }
  812. }
  813. }
  814. void String::Replace(unsigned pos, unsigned length, const char* srcStart, unsigned srcLength)
  815. {
  816. int delta = (int)srcLength - (int)length;
  817. if (pos + length < length_)
  818. {
  819. if (delta < 0)
  820. {
  821. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  822. Resize(length_ + delta);
  823. }
  824. if (delta > 0)
  825. {
  826. Resize(length_ + delta);
  827. MoveRange(pos + srcLength, pos + length, length_ - pos - length);
  828. }
  829. }
  830. else
  831. Resize(length_ + delta);
  832. CopyChars(buffer_ + pos, srcStart, srcLength);
  833. }
  834. WString::WString() :
  835. length_(0),
  836. buffer_(0)
  837. {
  838. }
  839. WString::WString(const String& str) :
  840. length_(0),
  841. buffer_(0)
  842. {
  843. #ifdef WIN32
  844. unsigned neededSize = 0;
  845. wchar_t temp[3];
  846. unsigned byteOffset = 0;
  847. while (byteOffset < str.Length())
  848. {
  849. wchar_t* dest = temp;
  850. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  851. neededSize += dest - temp;
  852. }
  853. Resize(neededSize);
  854. byteOffset = 0;
  855. wchar_t* dest = buffer_;
  856. while (byteOffset < str.Length())
  857. String::EncodeUTF16(dest, str.NextUTF8Char(byteOffset));
  858. #else
  859. Resize(str.LengthUTF8());
  860. unsigned byteOffset = 0;
  861. wchar_t* dest = buffer_;
  862. while (byteOffset < str.Length())
  863. *dest++ = str.NextUTF8Char(byteOffset);
  864. #endif
  865. }
  866. WString::~WString()
  867. {
  868. delete[] buffer_;
  869. }
  870. void WString::Resize(unsigned newSize)
  871. {
  872. if (!newSize)
  873. {
  874. delete[] buffer_;
  875. buffer_ = 0;
  876. length_ = 0;
  877. }
  878. else
  879. {
  880. wchar_t* newBuffer = new wchar_t[newSize + 1];
  881. if (buffer_)
  882. memcpy(newBuffer, buffer_, length_ * sizeof(wchar_t));
  883. newBuffer[newSize] = 0;
  884. buffer_ = newBuffer;
  885. length_ = newSize;
  886. }
  887. }