bbstring.cpp 14 KB


  1. #include "bbstring.h"
  2. #include "bbarray.h"
  3. #include "bbplatform.h"
  4. #include "bbmonkey.h"
  5. #include <cwctype>
  6. #include <clocale>
  7. bbString::Rep bbString::_nullRep;
  8. #if BB_ANDROID
  9. #include <jni.h>
  10. //FIXME: SDL2 dependancy!
  11. extern "C" void *SDL_AndroidGetJNIEnv();
  12. #endif
  13. namespace{
  14. #if BB_ANDROID
  15. jclass jclass_lang;
  16. jmethodID jmethod_toUpper;
  17. jmethodID jmethod_toLower;
  18. jmethodID jmethod_capitalize;
  19. bbString JStringToString( JNIEnv *env,jstring jstr ){
  20. if( !jstr ) return "";
  21. const char *cstr=env->GetStringUTFChars( jstr,0 );
  22. bbString str=bbString::fromCString( cstr );
  23. env->ReleaseStringUTFChars( jstr,cstr );
  24. return str;
  25. }
  26. jstring StringToJString( JNIEnv *env,bbString str ){
  27. int n=str.utf8Length()+1;
  28. char *buf=new char[n];
  29. str.toCString( buf,n );
  30. jstring jstr=env->NewStringUTF( buf );
  31. return jstr;
  32. }
  33. bbString invokeStaticStringMethod( jmethodID jmethod,bbString arg ){
  34. JNIEnv *env=(JNIEnv*)SDL_AndroidGetJNIEnv();
  35. jstring jarg=StringToJString( env,arg );
  36. jstring jres=(jstring)env->CallStaticObjectMethod( jclass_lang,jmethod,jarg );
  37. bbString res=JStringToString( env,jres );
  38. env->DeleteLocalRef( jres );
  39. env->DeleteLocalRef( jarg );
  40. return res;
  41. }
  42. #endif
  43. void initLocale(){
  44. static bool inited;
  45. if( inited ) return;
  46. inited=true;
  47. #if BB_ANDROID
  48. JNIEnv *env=(JNIEnv*)SDL_AndroidGetJNIEnv();
  49. jclass_lang=env->FindClass( "com/monkey2/lib/Monkey2Lang" );
  50. jmethod_toUpper=env->GetStaticMethodID( jclass_lang,"toUpper","(Ljava/lang/String;)Ljava/lang/String;" );
  51. jmethod_toLower=env->GetStaticMethodID( jclass_lang,"toLower","(Ljava/lang/String;)Ljava/lang/String;" );
  52. jmethod_capitalize=env->GetStaticMethodID( jclass_lang,"capitalize","(Ljava/lang/String;)Ljava/lang/String;" );
  53. bb_printf( "initLocale: env=%p\n",env );
  54. #elif BB_WINDOWS
  55. std::setlocale( LC_ALL,"English" );
  56. // std::setlocale( LC_CTYPE,"English" );
  57. #else
  58. std::setlocale( LC_CTYPE,"en_US.UTF-8" );
  59. #endif
  60. }
  61. template<class C> int t_memcmp( const C *p1,const C *p2,int count ){
  62. return memcmp( p1,p2,count*sizeof(C) );
  63. }
  64. //returns END of dst!
  65. template<class C> C *t_memcpy( C *dst,const C *src,int count ){
  66. return (C*)memcpy( dst,src,count*sizeof(C) )+count;
  67. }
  68. int countUtf8Chars( const char *p,int sz ){
  69. const char *e=p+sz;
  70. int n=0;
  71. while( p!=e ){
  72. int c=*p++;
  73. if( c & 0x80 ){
  74. if( (c & 0xe0)==0xc0 ){
  75. if( p==e || (p[0] & 0xc0)!=0x80 ) return -1;
  76. p+=1;
  77. }else if( (c & 0xf0)==0xe0 ){
  78. if( p==e || p+1==e || (p[0] & 0xc0)!=0x80 || (p[1] & 0xc0)!=0x80 ) return -1;
  79. p+=2;
  80. }else{
  81. return -1;
  82. }
  83. }
  84. n+=1;
  85. }
  86. return n;
  87. }
  88. int countNullTerminatedUtf8Chars( const char *p,int sz ){
  89. const char *e=p+sz;
  90. int n=0;
  91. while( p!=e && *p ){
  92. int c=*p++;
  93. if( c & 0x80 ){
  94. if( (c & 0xe0)==0xc0 ){
  95. if( p==e || (p[0] & 0xc0)!=0x80 ) return -1;
  96. p+=1;
  97. }else if( (c & 0xf0)==0xe0 ){
  98. if( p==e || p+1==e || (p[0] & 0xc0)!=0x80 || (p[1] & 0xc0)!=0x80 ) return -1;
  99. p+=2;
  100. }else{
  101. return -1;
  102. }
  103. }
  104. n+=1;
  105. }
  106. return n;
  107. }
  108. void charsToUtf8( const bbChar *p,int n,char *dst,int size ){
  109. char *end=dst+size;
  110. const bbChar *e=p+n;
  111. while( p<e && dst<end ){
  112. bbChar c=*p++;
  113. if( c<0x80 ){
  114. *dst++=c;
  115. }else if( c<0x800 ){
  116. if( dst+2>end ) break;
  117. *dst++=0xc0 | (c>>6);
  118. *dst++=0x80 | (c & 0x3f);
  119. }else{
  120. if( dst+3>end ) break;
  121. *dst++=0xe0 | (c>>12);
  122. *dst++=0x80 | ((c>>6) & 0x3f);
  123. *dst++=0x80 | (c & 0x3f);
  124. }
  125. }
  126. if( dst<end ) *dst++=0;
  127. }
  128. void utf8ToChars( const char *p,bbChar *dst,int n ){
  129. while( n-- ){
  130. int c=*p++;
  131. if( c & 0x80 ){
  132. if( (c & 0xe0)==0xc0 ){
  133. c=((c & 0x1f)<<6) | (p[0] & 0x3f);
  134. p+=1;
  135. }else if( (c & 0xf0)==0xe0 ){
  136. c=((c & 0x0f)<<12) | ((p[0] & 0x3f)<<6) | (p[1] & 0x3f);
  137. p+=2;
  138. }
  139. }
  140. *dst++=c;
  141. }
  142. }
  143. }
  144. // ***** bbString::Rep *****
  145. bbString::Rep *bbString::Rep::alloc( int length ){
  146. if( !length ) return &_nullRep;
  147. Rep *rep=(Rep*)bbGC::malloc( sizeof(Rep)+length*sizeof(bbChar) );
  148. rep->refs=1;
  149. rep->length=length;
  150. return rep;
  151. }
  152. // ***** bbString *****
  153. bbString::bbString( const void *p ){
  154. const char *cp=(const char*)p;
  155. if( !cp ){
  156. _rep=&_nullRep;
  157. return;
  158. }
  159. int sz=strlen( cp );
  160. int n=countNullTerminatedUtf8Chars( cp,sz );
  161. if( n==-1 || n==sz ){
  162. _rep=Rep::create( cp,sz );
  163. return;
  164. }
  165. _rep=Rep::alloc( n );
  166. utf8ToChars( cp,_rep->data,n );
  167. }
  168. bbString::bbString( const void *p,int sz ){
  169. const char *cp=(const char*)p;
  170. if( !cp ){
  171. _rep=&_nullRep;
  172. return;
  173. }
  174. int n=countUtf8Chars( cp,sz );
  175. if( n==-1 || n==sz ){
  176. _rep=Rep::create( cp,sz );
  177. return;
  178. }
  179. _rep=Rep::alloc( n );
  180. utf8ToChars( cp,_rep->data,n );
  181. }
  182. bbString::bbString( const bbChar *data ):_rep( Rep::create( data ) ){
  183. }
  184. bbString::bbString( const bbChar *data,int length ):_rep( Rep::create( data,length ) ){
  185. }
  186. bbString::bbString( const wchar_t *data ):_rep( Rep::create( data ) ){
  187. }
  188. bbString::bbString( const wchar_t *data,int length ):_rep( Rep::create( data,length ) ){
  189. }
  190. int bbString::utf8Length()const{
  191. const bbChar *p=data();
  192. const bbChar *e=p+length();
  193. int n=0;
  194. while( p<e ){
  195. bbChar c=*p++;
  196. if( c<0x80 ){
  197. n+=1;
  198. }else if( c<0x800 ){
  199. n+=2;
  200. }else{
  201. n+=3;
  202. }
  203. }
  204. return n;
  205. }
  206. bbString::bbString( bool b ){
  207. _rep=Rep::create( b ? "True" : "False" );
  208. }
  209. bbString::bbString( int n ){
  210. char data[64];
  211. sprintf( data,"%d",n );
  212. _rep=Rep::create( data );
  213. }
  214. bbString::bbString( unsigned int n ){
  215. char data[64];
  216. sprintf( data,"%u",n );
  217. _rep=Rep::create( data );
  218. }
  219. bbString::bbString( long n ){
  220. char data[64];
  221. sprintf( data,"%ld",n );
  222. _rep=Rep::create( data );
  223. }
  224. bbString::bbString( unsigned long n ){
  225. char data[64];
  226. sprintf( data,"%lu",n );
  227. _rep=Rep::create( data );
  228. }
  229. bbString::bbString( long long n ){
  230. char data[64];
  231. sprintf( data,"%lld",n );
  232. _rep=Rep::create( data );
  233. }
  234. bbString::bbString( unsigned long long n ){
  235. char data[64];
  236. sprintf( data,"%llu",n );
  237. _rep=Rep::create( data );
  238. }
  239. bbString::bbString( float n ){
  240. char data[64];
  241. sprintf( data,"%.9g",n );
  242. _rep=Rep::create( data );
  243. }
  244. bbString::bbString( double n ){
  245. char data[64];
  246. sprintf( data,"%.17g",n );
  247. _rep=Rep::create( data );
  248. }
  249. void bbString::toCString( void *buf,int size )const{
  250. charsToUtf8( _rep->data,_rep->length,(char*)buf,size );
  251. }
  252. void bbString::toWString( void *buf,int size )const{
  253. size=size/sizeof(wchar_t);
  254. if( size<=0 ) return;
  255. int sz=length();
  256. if( sz>size ) sz=size;
  257. for( int i=0;i<sz;++i ) ((wchar_t*)buf)[i]=data()[i];
  258. if( sz<size ) ((wchar_t*)buf)[sz]=0;
  259. }
  260. const char *bbString::c_str()const{
  261. static int _sz;
  262. static char *_tmp;
  263. int sz=utf8Length()+1;
  264. if( sz>_sz ){
  265. ::free( _tmp );
  266. _tmp=(char*)::malloc( _sz=sz );
  267. }
  268. toCString( _tmp,sz );
  269. return _tmp;
  270. }
  271. bool bbString::startsWith( const bbString &str )const{
  272. if( str.length()>length() ) return false;
  273. return t_memcmp( data(),str.data(),str.length() )==0;
  274. }
  275. bool bbString::endsWith( const bbString &str )const{
  276. if( str.length()>length() ) return false;
  277. return t_memcmp( data()+(length()-str.length()),str.data(),str.length() )==0;
  278. }
  279. bbString bbString::fromChar( int chr ){
  280. wchar_t chrs[]={ wchar_t(chr) };
  281. return bbString( chrs,1 );
  282. }
  283. bbArray<bbString> bbString::split( bbString sep )const{
  284. if( !sep.length() ){
  285. bbArray<bbString> bits=bbArray<bbString>( length() );
  286. bits.retain();
  287. for( int i=0;i<length();++i ){
  288. bits[i]=bbString( &data()[i],1 );
  289. }
  290. bits.release();
  291. return bits;
  292. }
  293. int i=0,i2,n=1;
  294. while( (i2=find( sep,i ))!=-1 ){
  295. ++n;
  296. i=i2+sep.length();
  297. }
  298. bbArray<bbString> bits=bbArray<bbString>( n );
  299. bits.retain();
  300. if( n==1 ){
  301. bits[0]=*this;
  302. }else{
  303. i=0;n=0;
  304. while( (i2=find( sep,i ))!=-1 ){
  305. bits[n++]=slice( i,i2 );
  306. i=i2+sep.length();
  307. }
  308. bits[n]=slice( i );
  309. }
  310. bits.release();
  311. return bits;
  312. }
  313. bbString bbString::join( bbArray<bbString> bits )const{
  314. if( bits.length()==0 ) return bbString();
  315. if( bits.length()==1 ) return bits[0];
  316. int len=length() * (bits.length()-1);
  317. for( int i=0;i<bits.length();++i ) len+=bits[i].length();
  318. Rep *rep=Rep::alloc( len );
  319. bbChar *p=rep->data;
  320. p=t_memcpy( p,bits[0].data(),bits[0].length() );
  321. for( int i=1;i<bits.length();++i ){
  322. p=t_memcpy( p,data(),length() );
  323. p=t_memcpy( p,bits[i].data(),bits[i].length() );
  324. }
  325. return rep;
  326. }
  327. bbString bbString::fromChars( bbArray<int> chrs ){
  328. return Rep::create( chrs.data(),chrs.length() );
  329. }
  330. bbString bbString::operator-()const{
  331. Rep *rep=Rep::alloc( length() );
  332. const bbChar *p=data()+length();
  333. for( int i=0;i<rep->length;++i ) rep->data[i]=*--p;
  334. return rep;
  335. }
  336. bbString bbString::operator+( const bbString &str )const{
  337. if( !length() ) return str;
  338. if( !str.length() ) return *this;
  339. Rep *rep=Rep::alloc( length()+str.length() );
  340. t_memcpy( rep->data,data(),length() );
  341. t_memcpy( rep->data+length(),str.data(),str.length() );
  342. return rep;
  343. }
  344. bbString bbString::operator*( int n )const{
  345. Rep *rep=Rep::alloc( length()*n );
  346. bbChar *p=rep->data;
  347. for( int j=0;j<n;++j ){
  348. for( int i=0;i<_rep->length;++i ) *p++=data()[i];
  349. }
  350. return rep;
  351. }
  352. int bbString::find( bbString str,int from )const{
  353. if( from<0 ) from=0;
  354. for( int i=from;i<=length()-str.length();++i ){
  355. if( !t_memcmp( data()+i,str.data(),str.length() ) ) return i;
  356. }
  357. return -1;
  358. }
  359. int bbString::findLast( const bbString &str,int from )const{
  360. if( from<0 ) from=0;
  361. for( int i=length()-str.length();i>=from;--i ){
  362. if( !t_memcmp( data()+i,str.data(),str.length() ) ) return i;
  363. }
  364. return -1;
  365. }
  366. bbString bbString::slice( int from )const{
  367. int length=this->length();
  368. if( from<0 ){
  369. from+=length;
  370. if( from<0 ) from=0;
  371. }else if( from>length ){
  372. from=length;
  373. }
  374. if( !from ) return *this;
  375. return bbString( data()+from,length-from );
  376. }
  377. bbString bbString::slice( int from,int term )const{
  378. int length=this->length();
  379. if( from<0 ){
  380. from+=length;
  381. if( from<0 ) from=0;
  382. }else if( from>length ){
  383. from=length;
  384. }
  385. if( term<0 ){
  386. term+=length;
  387. if( term<from ) term=from;
  388. }else if( term<from ){
  389. term=from;
  390. }else if( term>length ){
  391. term=length;
  392. }
  393. if( !from && term==length ) return *this;
  394. return bbString( data()+from,term-from );
  395. }
  396. bbString bbString::toUpper()const{
  397. initLocale();
  398. #if BB_ANDROID
  399. return invokeStaticStringMethod( jmethod_toUpper,*this );
  400. #else
  401. Rep *rep=Rep::alloc( length() );
  402. for( int i=0;i<length();++i ) rep->data[i]=::towupper( data()[i] );
  403. return rep;
  404. #endif
  405. }
  406. bbString bbString::toLower()const{
  407. initLocale();
  408. #if BB_ANDROID
  409. return invokeStaticStringMethod( jmethod_toLower,*this );
  410. #else
  411. Rep *rep=Rep::alloc( length() );
  412. for( int i=0;i<length();++i ) rep->data[i]=::towlower( data()[i] );
  413. return rep;
  414. #endif
  415. }
  416. bbString bbString::capitalize()const{
  417. initLocale();
  418. #if BB_ANDROID
  419. return invokeStaticStringMethod( jmethod_capitalize,*this );
  420. #else
  421. if( !length() ) return &_nullRep;
  422. Rep *rep=Rep::alloc( length() );
  423. rep->data[0]=::towupper( data()[0] );
  424. for( int i=1;i<length();++i ) rep->data[i]=data()[i];
  425. return rep;
  426. #endif
  427. }
  428. bbString bbString::trim()const{
  429. const bbChar *beg=data();
  430. const bbChar *end=data()+length();
  431. while( beg!=end && *beg<=32 ) ++beg;
  432. while( beg!=end && *(end-1)<=32 ) --end;
  433. if( end-beg==length() ) return *this;
  434. return bbString( beg,end-beg );
  435. }
  436. bbString bbString::trimStart()const{
  437. const bbChar *beg=data();
  438. const bbChar *end=data()+length();
  439. while( beg!=end && *beg<=32 ) ++beg;
  440. if( end-beg==length() ) return *this;
  441. return bbString( beg,end-beg );
  442. }
  443. bbString bbString::trimEnd()const{
  444. const bbChar *beg=data();
  445. const bbChar *end=data()+length();
  446. while( beg!=end && *(end-1)<=32 ) --end;
  447. if( end-beg==length() ) return *this;
  448. return bbString( beg,end-beg );
  449. }
  450. bbString bbString::dup( int n )const{
  451. Rep *rep=Rep::alloc( length()*n );
  452. bbChar *p=rep->data;
  453. for( int j=0;j<n;++j ){
  454. for( int i=0;i<_rep->length;++i ) *p++=data()[i];
  455. }
  456. return rep;
  457. }
  458. bbString bbString::replace( const bbString &str,const bbString &repl )const{
  459. int n=0;
  460. for( int i=0;; ){
  461. i=find( str,i );
  462. if( i==-1 ) break;
  463. i+=str.length();
  464. ++n;
  465. }
  466. if( !n ) return *this;
  467. Rep *rep=Rep::alloc( length()+n*(repl.length()-str.length()) );
  468. bbChar *dst=rep->data;
  469. for( int i=0;; ){
  470. int i2=find( str,i );
  471. if( i2==-1 ){
  472. t_memcpy( dst,data()+i,(length()-i) );
  473. break;
  474. }
  475. t_memcpy( dst,data()+i,(i2-i) );
  476. dst+=(i2-i);
  477. t_memcpy( dst,repl.data(),repl.length() );
  478. dst+=repl.length();
  479. i=i2+str.length();
  480. }
  481. return rep;
  482. }
  483. int bbString::compare( const bbString &t )const{
  484. int len=length()<t.length() ? length() : t.length();
  485. for( int i=0;i<len;++i ){
  486. if( int n=data()[i]-t.data()[i] ) return n;
  487. }
  488. return length()-t.length();
  489. }
  490. bbString::operator bbInt()const{
  491. return std::atoi( c_str() );
  492. }
  493. bbString::operator bbByte()const{
  494. return operator bbInt() & 0xff;
  495. }
  496. bbString::operator bbUByte()const{
  497. return operator bbInt() & 0xffu;
  498. }
  499. bbString::operator bbShort()const{
  500. return operator bbInt() & 0xffff;
  501. }
  502. bbString::operator bbUShort()const{
  503. return operator bbInt() & 0xffffu;
  504. }
  505. bbString::operator bbUInt()const{
  506. bbUInt n=0;
  507. sscanf( c_str(),"%u",&n );
  508. return n;
  509. }
  510. bbString::operator bbLong()const{
  511. bbLong n=0;
  512. sscanf( c_str(),"%lld",&n );
  513. return n;
  514. }
  515. bbString::operator bbULong()const{
  516. bbULong n=0;
  517. sscanf( c_str(),"%llu",&n );
  518. return n;
  519. }
  520. bbString::operator float()const{
  521. return std::atof( c_str() );
  522. }
  523. bbString::operator double()const{
  524. return std::atof( c_str() );
  525. }
  526. // ***** CString *****
  527. bbCString::bbCString( const bbString &str ){
  528. int size=str.utf8Length()+1;
  529. _data=(char*)bbGC::malloc( size );
  530. str.toCString( _data,size );
  531. }
  532. bbCString::~bbCString(){
  533. bbGC::free( _data );
  534. }
  535. bbCString::operator char*()const{
  536. return _data;
  537. }
  538. bbCString::operator signed char*()const{
  539. return (signed char*)_data;
  540. }
  541. bbCString::operator unsigned char*()const{
  542. return (unsigned char*)_data;
  543. }
  544. // ***** WString *****
  545. bbWString::bbWString( const bbString &str ){
  546. int size=(str.length()+1)*sizeof(wchar_t);
  547. _data=(wchar_t*)bbGC::malloc( size );
  548. str.toWString( _data,size );
  549. }
  550. bbWString::~bbWString(){
  551. bbGC::free( _data );
  552. }
  553. bbWString::operator wchar_t*()const{
  554. return _data;
  555. }