PolyString.cpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. #include "PolyString.h"
  2. using namespace Polycode;
  3. String::String() {
  4. }
  5. String::String(const wchar_t *str) {
  6. contents = wstring(str);
  7. }
  8. String::String(const char *str) {
  9. string sstr = string(str);
  10. contents.assign(sstr.begin(), sstr.end());
  11. }
  12. String::String(const wchar_t *str, size_t n) {
  13. contents = wstring(str, n);
  14. }
  15. String::String(string str) {
  16. contents.assign(str.begin(), str.end());
  17. }
  18. String::String(wstring str) {
  19. contents = str;
  20. }
  21. String::~String() {
  22. }
  23. size_t String::getDataSizeWithEncoding(int encoding) {
  24. switch(encoding) {
  25. case ENCODING_UTF8: {
  26. string dest;
  27. wstrToUtf8(dest, contents);
  28. return dest.size();
  29. }
  30. default:
  31. return NULL;
  32. }
  33. }
  34. const char *String::getDataWithEncoding(int encoding) {
  35. switch(encoding) {
  36. case ENCODING_UTF8: {
  37. string dest;
  38. wstrToUtf8(dest, contents);
  39. return dest.data();
  40. }
  41. break;
  42. default:
  43. return NULL;
  44. }
  45. }
  46. void String::setDataWithEncoding(char *data, int encoding) {
  47. switch(encoding) {
  48. case ENCODING_UTF8: {
  49. string str = string(data);
  50. utf8toWStr(contents, str);
  51. }
  52. default:
  53. break;
  54. }
  55. }
  56. vector<String> String::split(const String &delims) {
  57. vector<String> tokens;
  58. bool trimEmpty = false;
  59. std::wstring::size_type pos, lastPos = 0;
  60. while(true)
  61. {
  62. pos = contents.find_first_of(delims.contents, lastPos);
  63. if(pos == std::wstring::npos)
  64. {
  65. pos = contents.length();
  66. if(pos != lastPos || !trimEmpty)
  67. tokens.push_back(vector<String>::value_type(contents.data()+lastPos, (wstring::size_type)pos-lastPos ));
  68. break;
  69. }
  70. else
  71. {
  72. if(pos != lastPos || !trimEmpty)
  73. tokens.push_back(vector<String>::value_type(contents.data()+lastPos, (wstring::size_type)pos-lastPos ));
  74. }
  75. lastPos = pos + 1;
  76. }
  77. return tokens;
  78. }
  79. String String::replace(const String &what, const String &withWhat) {
  80. vector<String> arr = split(what);
  81. String retString = "";
  82. for(int i= 0; i < arr.size(); i++) {
  83. retString += arr[i];
  84. if(i < arr.size()-1)
  85. retString += withWhat;
  86. }
  87. return retString;
  88. }
  89. String String::toLowerCase() {
  90. wstring str = contents;
  91. std::transform(str.begin(), str.end(), str.begin(),std::tolower);
  92. return String(str);
  93. }
  94. String String::NumberToString(Number value) {
  95. char temp[128];
  96. sprintf(temp, "%f", value);
  97. return String(temp);
  98. }
  99. string String::getSTLString() {
  100. s_contents.assign(contents.begin(),contents.end());
  101. return s_contents;
  102. }
  103. wstring String::getSTLWString() {
  104. return contents;
  105. }
  106. const char *String::c_str() {
  107. s_contents.assign(contents.begin(),contents.end());
  108. return s_contents.c_str();
  109. }
  110. const wchar_t *String::wc_str() {
  111. return contents.c_str();
  112. }
  113. void utf8toWStr(WStr& dest, const Str& src){
  114. dest.clear();
  115. wchar_t w = 0;
  116. int bytes = 0;
  117. wchar_t err = L'�';
  118. for (size_t i = 0; i < src.size(); i++){
  119. unsigned char c = (unsigned char)src[i];
  120. if (c <= 0x7f){//first byte
  121. if (bytes){
  122. dest.push_back(err);
  123. bytes = 0;
  124. }
  125. dest.push_back((wchar_t)c);
  126. }
  127. else if (c <= 0xbf){//second/third/etc byte
  128. if (bytes){
  129. w = ((w << 6)|(c & 0x3f));
  130. bytes--;
  131. if (bytes == 0)
  132. dest.push_back(w);
  133. }
  134. else
  135. dest.push_back(err);
  136. }
  137. else if (c <= 0xdf){//2byte sequence start
  138. bytes = 1;
  139. w = c & 0x1f;
  140. }
  141. else if (c <= 0xef){//3byte sequence start
  142. bytes = 2;
  143. w = c & 0x0f;
  144. }
  145. else if (c <= 0xf7){//3byte sequence start
  146. bytes = 3;
  147. w = c & 0x07;
  148. }
  149. else{
  150. dest.push_back(err);
  151. bytes = 0;
  152. }
  153. }
  154. if (bytes)
  155. dest.push_back(err);
  156. }
  157. void wstrToUtf8(Str& dest, const WStr& src){
  158. dest.clear();
  159. for (size_t i = 0; i < src.size(); i++){
  160. wchar_t w = src[i];
  161. if (w <= 0x7f)
  162. dest.push_back((char)w);
  163. else if (w <= 0x7ff){
  164. dest.push_back(0xc0 | ((w >> 6)& 0x1f));
  165. dest.push_back(0x80| (w & 0x3f));
  166. }
  167. else if (w <= 0xffff){
  168. dest.push_back(0xe0 | ((w >> 12)& 0x0f));
  169. dest.push_back(0x80| ((w >> 6) & 0x3f));
  170. dest.push_back(0x80| (w & 0x3f));
  171. }
  172. else if (w <= 0x10ffff){
  173. dest.push_back(0xf0 | ((w >> 18)& 0x07));
  174. dest.push_back(0x80| ((w >> 12) & 0x3f));
  175. dest.push_back(0x80| ((w >> 6) & 0x3f));
  176. dest.push_back(0x80| (w & 0x3f));
  177. }
  178. else
  179. dest.push_back('?');
  180. }
  181. }