blitz_string.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312
  1. #include "blitz.h"
  2. #include "bdwgc/libatomic_ops/src/atomic_ops.h"
  3. #include "blitz_unicode.h"
  4. #define XXH_IMPLEMENTATION
  5. #define XXH_STATIC_LINKING_ONLY
  6. #include "hash/xxhash.h"
  7. static void bbStringFree( BBObject *o );
  8. static BBDebugScope debugScope={
  9. BBDEBUGSCOPE_USERTYPE,
  10. "String",
  11. {
  12. {
  13. BBDEBUGDECL_END
  14. }
  15. }
  16. };
  17. struct BBClass_String bbStringClass={
  18. &bbObjectClass, //super
  19. bbStringFree, //free
  20. &debugScope, //DebugScope
  21. 0, //instance_size
  22. 0, //ctor
  23. 0, //dtor
  24. (BBString*(*)(BBObject*))bbStringToString,
  25. (int(*)(BBObject*,BBObject*))bbStringCompare,
  26. bbObjectSendMessage,
  27. 0, //interface
  28. 0, //extra
  29. 0,
  30. 0, //instance_count
  31. offsetof(BBString, hash), //fields_offset
  32. bbStringFind,
  33. bbStringFindLast,
  34. bbStringTrim,
  35. bbStringReplace,
  36. bbStringToLower,
  37. bbStringToUpper,
  38. bbStringToInt,
  39. bbStringToLong,
  40. bbStringToFloat,
  41. bbStringToDouble,
  42. bbStringToCString,
  43. bbStringToWString,
  44. bbStringFromInt,
  45. bbStringFromLong,
  46. bbStringFromFloat,
  47. bbStringFromDouble,
  48. bbStringFromCString,
  49. bbStringFromWString,
  50. bbStringFromBytes,
  51. bbStringFromShorts,
  52. bbStringStartsWith,
  53. bbStringEndsWith,
  54. bbStringContains,
  55. bbStringSplit,
  56. bbStringJoin,
  57. bbStringFromUTF8String,
  58. bbStringToUTF8String,
  59. bbStringFromUTF8Bytes,
  60. bbStringToSizet,
  61. bbStringFromSizet,
  62. bbStringToUInt,
  63. bbStringFromUInt,
  64. bbStringToULong,
  65. bbStringFromULong,
  66. #ifdef _WIN32
  67. bbStringToWParam,
  68. bbStringFromWParam,
  69. bbStringToLParam,
  70. bbStringFromLParam,
  71. #endif
  72. bbStringToUTF8StringBuffer,
  73. bbStringHash,
  74. bbStringToUTF32String,
  75. bbStringFromUTF32String,
  76. bbStringFromUTF32Bytes,
  77. bbStringToWStringBuffer,
  78. bbStringToLongInt,
  79. bbStringFromLongInt,
  80. bbStringToULongInt,
  81. bbStringFromULongInt
  82. };
  83. BBString bbEmptyString={
  84. (BBClass*)&bbStringClass, //clas
  85. 0x776eddfb6bfd9195, // hash
  86. 0 //length
  87. };
  88. static int wstrlen( const BBChar *p ){
  89. const BBChar *t=p;
  90. while( *t ) ++t;
  91. return t-p;
  92. }
  93. static int utf32strlen( const BBUINT *p ){
  94. const BBUINT *t=p;
  95. while( *t ) ++t;
  96. return t-p;
  97. }
  98. static int charsEqual( unsigned short *a,unsigned short *b,int n ){
  99. while( n-- ){
  100. if (*a!=*b) return 0;
  101. a++;b++;
  102. }
  103. return 1;
  104. }
  105. #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
  106. extern int bbStringEquals( BBString *x,BBString *y);
  107. extern int bbObjectIsEmptyString(BBObject * o);
  108. extern BBULONG bbStringHash( BBString * x );
  109. #else
  110. BBULONG bbStringHash( BBString * x ) {
  111. if (x->hash > 0) return x->hash;
  112. x->hash = XXH3_64bits(x->buf, x->length * sizeof(BBChar));
  113. return x->hash;
  114. }
  115. int bbStringEquals( BBString *x,BBString *y ){
  116. if (x->clas != &bbStringClass || y->clas != &bbStringClass) return 0; // only strings with strings
  117. if (x->length-y->length != 0) return 0;
  118. if (x->hash != 0 ) {
  119. if (!y->hash) bbStringHash(y);
  120. return (x->hash == y->hash);
  121. }
  122. return memcmp(x->buf, y->buf, x->length * sizeof(BBChar)) == 0;
  123. }
  124. int bbObjectIsEmptyString(BBObject * o) {
  125. return (BBString*)o == &bbEmptyString;
  126. }
  127. #endif
  128. //***** Note: Not called in THREADED mode.
  129. static void bbStringFree( BBObject *o ){
  130. if (bbCountInstances) {
  131. bbAtomicAdd((int*)&bbStringClass.instance_count, -1);
  132. }
  133. }
  134. BBString *bbStringNew( int len ){
  135. BBString *str;
  136. if( !len ) return &bbEmptyString;
  137. str=(BBString*)bbGCAllocObject( sizeof(BBString)+len*sizeof(BBChar),(BBClass*)&bbStringClass,BBGC_ATOMIC );
  138. str->hash=0;
  139. str->length=len;
  140. return str;
  141. }
  142. BBString *bbStringFromChar( int c ){
  143. BBString *str=bbStringNew(1);
  144. str->buf[0]=c;
  145. return str;
  146. }
  147. BBString *bbStringFromInt( int n ){
  148. char buf[64];
  149. sprintf(buf, "%d", n);
  150. return bbStringFromBytes( (unsigned char*)buf, strlen(buf) );
  151. }
  152. BBString *bbStringFromUInt( unsigned int n ){
  153. char buf[64];
  154. sprintf(buf, "%u", n);
  155. return bbStringFromBytes( (unsigned char*)buf, strlen(buf) );
  156. }
  157. BBString *bbStringFromLong( BBInt64 n ){
  158. char buf[64];
  159. sprintf(buf, "%lld", n);
  160. return bbStringFromBytes( (unsigned char*)buf,strlen(buf) );
  161. }
  162. BBString *bbStringFromULong( BBUInt64 n ){
  163. char buf[64];
  164. sprintf(buf, "%llu", n);
  165. return bbStringFromBytes( (unsigned char*)buf, strlen(buf) );
  166. }
  167. BBString *bbStringFromSizet( BBSIZET n ){
  168. char buf[64];
  169. #if UINTPTR_MAX == 0xffffffff
  170. sprintf(buf, "%u", n);
  171. #else
  172. sprintf(buf, "%llu", n);
  173. #endif
  174. return bbStringFromBytes( (unsigned char*)buf, strlen(buf) );
  175. }
  176. BBString *bbStringFromLongInt( BBLONGINT n ){
  177. char buf[64];
  178. sprintf(buf, "%ld", n);
  179. return bbStringFromBytes( (unsigned char*)buf,strlen(buf) );
  180. }
  181. BBString *bbStringFromULongInt( BBULONGINT n ){
  182. char buf[64];
  183. sprintf(buf, "%lu", n);
  184. return bbStringFromBytes( (unsigned char*)buf,strlen(buf) );
  185. }
  186. BBString *bbStringFromFloat( float n ){
  187. char buf[64];
  188. sprintf( buf,"%#.9g",n );
  189. return bbStringFromCString(buf);
  190. }
  191. BBString *bbStringFromDouble( double n ){
  192. char buf[64];
  193. sprintf( buf,"%#.17lg",n );
  194. return bbStringFromCString(buf);
  195. }
  196. BBString *bbStringFromBytes( const unsigned char *p,int n ){
  197. int k;
  198. BBString *str;
  199. if( !n ) return &bbEmptyString;
  200. str=bbStringNew( n );
  201. for( k=0;k<n;++k ) str->buf[k]=p[k];
  202. return str;
  203. }
  204. BBString *bbStringFromShorts( const unsigned short *p,int n ){
  205. BBString *str;
  206. if( !n ) return &bbEmptyString;
  207. str=bbStringNew( n );
  208. bbMemCopy( str->buf,p,n*sizeof(short) );
  209. return str;
  210. }
  211. BBString *bbStringFromInts( const int *p,int n ){
  212. int k;
  213. BBString *str;
  214. if( !n ) return &bbEmptyString;
  215. str=bbStringNew( n );
  216. for( k=0;k<n;++k ) str->buf[k]=p[k];
  217. return str;
  218. }
  219. BBString *bbStringFromUInts( const unsigned int *p,int n ){
  220. int k;
  221. BBString *str;
  222. if( !n ) return &bbEmptyString;
  223. str=bbStringNew( n );
  224. for( k=0;k<n;++k ) str->buf[k]=p[k];
  225. return str;
  226. }
  227. BBString *bbStringFromArray( BBArray *arr ){
  228. int n;
  229. void *p;
  230. if( arr->dims!=1 ) return &bbEmptyString;
  231. n=arr->scales[0];
  232. p=BBARRAYDATA(arr,arr->dims);
  233. switch( arr->type[0] ){
  234. case 'b':return bbStringFromBytes( (unsigned char*)p,n );
  235. case 's':return bbStringFromShorts( p,n );
  236. case 'i':return bbStringFromInts( p,n );
  237. }
  238. return &bbEmptyString;
  239. }
  240. BBString *bbStringFromCString( const char *p ){
  241. return p ? bbStringFromBytes( (unsigned char*)p,strlen(p) ) : &bbEmptyString;
  242. }
  243. BBString *bbStringFromWString( const BBChar *p ){
  244. return p ? bbStringFromShorts( p,wstrlen(p) ) : &bbEmptyString;
  245. }
  246. BBString *bbStringFromUTF8String( const unsigned char *p ){
  247. return p ? bbStringFromUTF8Bytes( p,strlen((char*)p) ) : &bbEmptyString;
  248. }
  249. #define REPLACEMENT_CHAR 0xFFFD
  250. BBString *bbStringFromUTF8Bytes(const unsigned char *p, int n) {
  251. if (!p || n <= 0) return &bbEmptyString;
  252. // Allocate worst-case: one output code unit per input byte.
  253. unsigned short *buffer = (unsigned short*)malloc(n * sizeof(unsigned short));
  254. if (!buffer) return &bbEmptyString; // Allocation failed
  255. unsigned short *dest = buffer;
  256. const unsigned char *end = p + n;
  257. while (p < end) {
  258. unsigned int codepoint;
  259. unsigned char byte = *p++;
  260. if (byte < 0x80) {
  261. // 1-byte (ASCII)
  262. *dest++ = byte;
  263. } else if (byte < 0xC0) {
  264. // Unexpected continuation byte; insert replacement.
  265. *dest++ = REPLACEMENT_CHAR;
  266. } else if (byte < 0xE0) {
  267. // 2-byte sequence: 110xxxxx 10xxxxxx
  268. if (p >= end) {
  269. *dest++ = REPLACEMENT_CHAR;
  270. break;
  271. }
  272. unsigned char byte2 = *p++;
  273. if ((byte2 & 0xC0) != 0x80) {
  274. *dest++ = REPLACEMENT_CHAR;
  275. continue;
  276. }
  277. codepoint = ((byte & 0x1F) << 6) | (byte2 & 0x3F);
  278. if (codepoint < 0x80) { // Overlong encoding
  279. *dest++ = REPLACEMENT_CHAR;
  280. } else {
  281. *dest++ = (unsigned short)codepoint;
  282. }
  283. } else if (byte < 0xF0) {
  284. // 3-byte sequence: 1110xxxx 10xxxxxx 10xxxxxx
  285. if (p + 1 >= end) {
  286. *dest++ = REPLACEMENT_CHAR;
  287. break;
  288. }
  289. unsigned char byte2 = *p++;
  290. unsigned char byte3 = *p++;
  291. if ((byte2 & 0xC0) != 0x80 || (byte3 & 0xC0) != 0x80) {
  292. *dest++ = REPLACEMENT_CHAR;
  293. continue;
  294. }
  295. codepoint = ((byte & 0x0F) << 12) |
  296. ((byte2 & 0x3F) << 6) |
  297. (byte3 & 0x3F);
  298. // Reject overlong sequences and surrogate halves.
  299. if (codepoint < 0x800 || (codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
  300. *dest++ = REPLACEMENT_CHAR;
  301. } else {
  302. *dest++ = (unsigned short)codepoint;
  303. }
  304. } else if (byte < 0xF8) {
  305. // 4-byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  306. if (p + 2 >= end) {
  307. *dest++ = REPLACEMENT_CHAR;
  308. break;
  309. }
  310. unsigned char byte2 = *p++;
  311. unsigned char byte3 = *p++;
  312. unsigned char byte4 = *p++;
  313. if ((byte2 & 0xC0) != 0x80 ||
  314. (byte3 & 0xC0) != 0x80 ||
  315. (byte4 & 0xC0) != 0x80) {
  316. *dest++ = REPLACEMENT_CHAR;
  317. continue;
  318. }
  319. codepoint = ((byte & 0x07) << 18) |
  320. ((byte2 & 0x3F) << 12) |
  321. ((byte3 & 0x3F) << 6) |
  322. (byte4 & 0x3F);
  323. // Ensure codepoint is within valid range.
  324. if (codepoint < 0x10000 || codepoint > 0x10FFFF) {
  325. *dest++ = REPLACEMENT_CHAR;
  326. } else {
  327. // Convert to surrogate pair.
  328. codepoint -= 0x10000;
  329. unsigned short highSurrogate = 0xD800 | ((codepoint >> 10) & 0x3FF);
  330. unsigned short lowSurrogate = 0xDC00 | (codepoint & 0x3FF);
  331. *dest++ = highSurrogate;
  332. *dest++ = lowSurrogate;
  333. }
  334. } else {
  335. // Bytes above 0xF7 are invalid in modern UTF-8.
  336. *dest++ = REPLACEMENT_CHAR;
  337. }
  338. }
  339. BBString *str = bbStringFromShorts(buffer, dest - buffer);
  340. free(buffer);
  341. return str;
  342. }
  343. BBString *bbStringToString( BBString *t ){
  344. return t;
  345. }
  346. int bbStringCompare( BBString *x,BBString *y ){
  347. int k,n,sz;
  348. if (x->clas != (BBClass*)&bbStringClass || y->clas != (BBClass*)&bbStringClass) return -1; // only compare strings with strings
  349. sz=x->length<y->length ? x->length : y->length;
  350. if (x->length == y->length && x->hash) {
  351. if (!y->hash) bbStringHash(y);
  352. if (x->hash == y->hash) return 0;
  353. }
  354. for( k=0;k<sz;++k ) if( (n=x->buf[k]-y->buf[k]) ) return n;
  355. return x->length-y->length;
  356. }
  357. int bbStringStartsWith( BBString *x,BBString *y ){
  358. BBChar *p,*q;
  359. int k,sz=y->length;
  360. if( x->length<sz ) return 0;
  361. p=x->buf;
  362. q=y->buf;
  363. for( k=0;k<sz;++k ) if( *p++!=*q++ ) return 0;
  364. return 1;
  365. }
  366. int bbStringEndsWith( BBString *x,BBString *y ){
  367. BBChar *p,*q;
  368. int k,sz=y->length;
  369. if( x->length<sz ) return 0;
  370. p=x->buf+x->length-sz;
  371. q=y->buf;
  372. for( k=0;k<sz;++k ) if( *p++!=*q++ ) return 0;
  373. return 1;
  374. }
  375. int bbStringContains( BBString *x,BBString *y ){
  376. return bbStringFind( x,y,0 )!=-1;
  377. }
  378. BBString *bbStringConcat( BBString *x,BBString *y ){
  379. int len=x->length+y->length;
  380. BBString *t=bbStringNew(len);
  381. memcpy( t->buf,x->buf,x->length*sizeof(BBChar) );
  382. memcpy( t->buf+x->length,y->buf,y->length*sizeof(BBChar) );
  383. return t;
  384. }
  385. BBString *bbStringSlice( BBString *in,int beg,int end ){
  386. BBChar *p;
  387. BBString *out;
  388. int k,n,len,inlen;
  389. len=end-beg;
  390. if( len<=0 ) return &bbEmptyString;
  391. out=bbStringNew( len );
  392. p=out->buf;
  393. inlen=in->length;
  394. if( (n=-beg)>0 ){
  395. if( beg+n>end ) n=end-beg;
  396. for( k=0;k<n;++k ) *p++=' ';
  397. if( (beg+=n)==end ) return out;
  398. }
  399. if( (n=inlen-beg)>0 ){
  400. BBChar *q=in->buf+beg;
  401. if( beg+n>end ) n=end-beg;
  402. for( k=0;k<n;++k ) *p++=*q++;
  403. if( (beg+=n)==end ) return out;
  404. }
  405. if( (n=end-beg)>0 ){
  406. for( k=0;k<n;++k ) *p++=' ';
  407. }
  408. return out;
  409. }
  410. BBString *bbStringTrim( BBString *str ){
  411. int b=0,e=str->length;
  412. while( b<e && str->buf[b]<=' ' ) ++b;
  413. if( b==e ) return &bbEmptyString;
  414. while( str->buf[e-1]<=' ' ) --e;
  415. if( e-b==str->length ) return str;
  416. return bbStringFromShorts( str->buf+b,e-b );
  417. }
  418. BBString *bbStringReplace( BBString *str,BBString *sub,BBString *rep ){
  419. int i,d,n,j,p;
  420. if( !sub->length ) return str;
  421. i=0;n=0;
  422. while( (i=bbStringFind(str,sub,i))!=-1 ) {i+=sub->length;n++;}
  423. if (!n) return str;
  424. d=rep->length-sub->length;
  425. BBString *t=bbStringNew( str->length+d*n );
  426. i=0;p=0;
  427. while( (j=bbStringFind(str,sub,i))!=-1 )
  428. {
  429. n=j-i;if (n) {memcpy( t->buf+p,str->buf+i,n*sizeof(BBChar) );p+=n;}
  430. n=rep->length;memcpy( t->buf+p,rep->buf,n*sizeof(BBChar) );p+=n;
  431. i=j+sub->length;
  432. }
  433. n=str->length-i;
  434. if (n) memcpy( t->buf+p,str->buf+i,n*sizeof(BBChar) );
  435. return t;
  436. }
  437. int bbStringAsc( BBString *t ){
  438. return t->length ? t->buf[0] : -1;
  439. }
  440. int bbStringFind( BBString *x,BBString *y,int i ){
  441. if( i<0 ) i=0;
  442. while( i+y->length<=x->length ){
  443. if( charsEqual( x->buf+i,y->buf,y->length ) ) return i;
  444. ++i;
  445. }
  446. return -1;
  447. }
  448. int bbStringFindLast( BBString *x,BBString *y,int i ){
  449. bbassert( i>=0 );
  450. i=x->length-i;
  451. if (i+y->length>x->length) i=x->length-y->length;
  452. while (i>=0)
  453. {
  454. if( charsEqual( x->buf+i,y->buf,y->length ) ) return i;
  455. --i;
  456. }
  457. return -1;
  458. }
  459. int bbStringToInt( BBString *t ){
  460. int i=0,neg=0,n=0;
  461. while( i<t->length && isspace(t->buf[i]) ) ++i;
  462. if( i==t->length ) return 0;
  463. if( t->buf[i]=='+' ) ++i;
  464. else if( (neg=(t->buf[i]=='-')) ) ++i;
  465. if( i==t->length ) return 0;
  466. if( t->buf[i]=='%' ){
  467. for( ++i;i<t->length;++i ){
  468. int c=t->buf[i];
  469. if( c!='0' && c!='1' ) break;
  470. n=n*2+(c-'0');
  471. }
  472. }else if( t->buf[i]=='$' ){
  473. for( ++i;i<t->length;++i ){
  474. int c=toupper(t->buf[i]);
  475. if( !isxdigit(c) ) break;
  476. if( c>='A' ) c-=('A'-'0'-10);
  477. n=n*16+(c-'0');
  478. }
  479. }else{
  480. for( ;i<t->length;++i ){
  481. int c=t->buf[i];
  482. if( !isdigit(c) ) break;
  483. n=n*10+(c-'0');
  484. }
  485. }
  486. return neg ? -n : n;
  487. }
  488. unsigned int bbStringToUInt( BBString *t ){
  489. int i=0,neg=0;
  490. unsigned n=0;
  491. while( i<t->length && isspace(t->buf[i]) ) ++i;
  492. if( i==t->length ) return 0;
  493. if( t->buf[i]=='+' ) ++i;
  494. else if( (neg = t->buf[i]=='-') ) ++i;
  495. if( i==t->length ) return 0;
  496. if( t->buf[i]=='%' ){
  497. for( ++i;i<t->length;++i ){
  498. int c=t->buf[i];
  499. if( c!='0' && c!='1' ) break;
  500. n=n*2+(c-'0');
  501. }
  502. }else if( t->buf[i]=='$' ){
  503. for( ++i;i<t->length;++i ){
  504. int c=toupper(t->buf[i]);
  505. if( !isxdigit(c) ) break;
  506. if( c>='A' ) c-=('A'-'0'-10);
  507. n=n*16+(c-'0');
  508. }
  509. }else{
  510. for( ;i<t->length;++i ){
  511. int c=t->buf[i];
  512. if( !isdigit(c) ) break;
  513. n=n*10+(c-'0');
  514. }
  515. }
  516. return neg ? -n : n;
  517. }
  518. BBInt64 bbStringToLong( BBString *t ){
  519. int i=0,neg=0;
  520. BBInt64 n=0;
  521. while( i<t->length && isspace(t->buf[i]) ) ++i;
  522. if( i==t->length ){ return 0; }
  523. if( t->buf[i]=='+' ) ++i;
  524. else if( (neg=(t->buf[i]=='-')) ) ++i;
  525. if( i==t->length ){ return 0; }
  526. if( t->buf[i]=='%' ){
  527. for( ++i;i<t->length;++i ){
  528. int c=t->buf[i];
  529. if( c!='0' && c!='1' ) break;
  530. n=n*2+(c-'0');
  531. }
  532. }else if( t->buf[i]=='$' ){
  533. for( ++i;i<t->length;++i ){
  534. int c=toupper(t->buf[i]);
  535. if( !isxdigit(c) ) break;
  536. if( c>='A' ) c-=('A'-'0'-10);
  537. n=n*16+(c-'0');
  538. }
  539. }else{
  540. for( ;i<t->length;++i ){
  541. int c=t->buf[i];
  542. if( !isdigit(c) ) break;
  543. n=n*10+(c-'0');
  544. }
  545. }
  546. //*r=neg ? -n : n;
  547. return neg ? -n : n;
  548. }
  549. BBUInt64 bbStringToULong( BBString *t ){
  550. int i=0,neg=0;
  551. BBUInt64 n=0;
  552. while( i<t->length && isspace(t->buf[i]) ) ++i;
  553. if( i==t->length ){ return 0; }
  554. if( t->buf[i]=='+' ) ++i;
  555. else if( (neg = t->buf[i]=='-') ) ++i;
  556. if( i==t->length ){ return 0; }
  557. if( t->buf[i]=='%' ){
  558. for( ++i;i<t->length;++i ){
  559. int c=t->buf[i];
  560. if( c!='0' && c!='1' ) break;
  561. n=n*2+(c-'0');
  562. }
  563. }else if( t->buf[i]=='$' ){
  564. for( ++i;i<t->length;++i ){
  565. int c=toupper(t->buf[i]);
  566. if( !isxdigit(c) ) break;
  567. if( c>='A' ) c-=('A'-'0'-10);
  568. n=n*16+(c-'0');
  569. }
  570. }else{
  571. for( ;i<t->length;++i ){
  572. int c=t->buf[i];
  573. if( !isdigit(c) ) break;
  574. n=n*10+(c-'0');
  575. }
  576. }
  577. return neg ? -n : n;
  578. }
  579. BBSIZET bbStringToSizet( BBString *t ){
  580. int i=0,neg=0;
  581. BBSIZET n=0;
  582. while( i<t->length && isspace(t->buf[i]) ) ++i;
  583. if( i==t->length ){ return 0; }
  584. if( t->buf[i]=='+' ) ++i;
  585. else if( (neg=(t->buf[i]=='-')) ) ++i;
  586. if( i==t->length ){ return 0; }
  587. if( t->buf[i]=='%' ){
  588. for( ++i;i<t->length;++i ){
  589. int c=t->buf[i];
  590. if( c!='0' && c!='1' ) break;
  591. n=n*2+(c-'0');
  592. }
  593. }else if( t->buf[i]=='$' ){
  594. for( ++i;i<t->length;++i ){
  595. int c=toupper(t->buf[i]);
  596. if( !isxdigit(c) ) break;
  597. if( c>='A' ) c-=('A'-'0'-10);
  598. n=n*16+(c-'0');
  599. }
  600. }else{
  601. for( ;i<t->length;++i ){
  602. int c=t->buf[i];
  603. if( !isdigit(c) ) break;
  604. n=n*10+(c-'0');
  605. }
  606. }
  607. //*r=neg ? -n : n;
  608. return neg ? -n : n;
  609. }
  610. BBLONGINT bbStringToLongInt( BBString *t ){
  611. int i=0,neg=0;
  612. BBLONGINT n=0;
  613. while( i<t->length && isspace(t->buf[i]) ) ++i;
  614. if( i==t->length ){ return 0; }
  615. if( t->buf[i]=='+' ) ++i;
  616. else if( (neg=(t->buf[i]=='-')) ) ++i;
  617. if( i==t->length ){ return 0; }
  618. if( t->buf[i]=='%' ){
  619. for( ++i;i<t->length;++i ){
  620. int c=t->buf[i];
  621. if( c!='0' && c!='1' ) break;
  622. n=n*2+(c-'0');
  623. }
  624. }else if( t->buf[i]=='$' ){
  625. for( ++i;i<t->length;++i ){
  626. int c=toupper(t->buf[i]);
  627. if( !isxdigit(c) ) break;
  628. if( c>='A' ) c-=('A'-'0'-10);
  629. n=n*16+(c-'0');
  630. }
  631. }else{
  632. for( ;i<t->length;++i ){
  633. int c=t->buf[i];
  634. if( !isdigit(c) ) break;
  635. n=n*10+(c-'0');
  636. }
  637. }
  638. //*r=neg ? -n : n;
  639. return neg ? -n : n;
  640. }
  641. BBULONGINT bbStringToULongInt( BBString *t ){
  642. int i=0,neg=0;
  643. BBULONGINT n=0;
  644. while( i<t->length && isspace(t->buf[i]) ) ++i;
  645. if( i==t->length ){ return 0; }
  646. if( t->buf[i]=='+' ) ++i;
  647. else if( (neg = t->buf[i]=='-') ) ++i;
  648. if( i==t->length ){ return 0; }
  649. if( t->buf[i]=='%' ){
  650. for( ++i;i<t->length;++i ){
  651. int c=t->buf[i];
  652. if( c!='0' && c!='1' ) break;
  653. n=n*2+(c-'0');
  654. }
  655. }else if( t->buf[i]=='$' ){
  656. for( ++i;i<t->length;++i ){
  657. int c=toupper(t->buf[i]);
  658. if( !isxdigit(c) ) break;
  659. if( c>='A' ) c-=('A'-'0'-10);
  660. n=n*16+(c-'0');
  661. }
  662. }else{
  663. for( ;i<t->length;++i ){
  664. int c=t->buf[i];
  665. if( !isdigit(c) ) break;
  666. n=n*10+(c-'0');
  667. }
  668. }
  669. return neg ? -n : n;
  670. }
  671. float bbStringToFloat( BBString *t ){
  672. char *p=(char*)bbStringToCString( t );
  673. float n=atof( p );
  674. bbMemFree( p );
  675. return n;
  676. }
  677. double bbStringToDouble( BBString *t ){
  678. char *p=(char*)bbStringToCString( t );
  679. double n=atof( p );
  680. bbMemFree( p );
  681. return n;
  682. }
  683. #ifdef _WIN32
  684. WPARAM bbStringToWParam( BBString *t ){
  685. int i=0,neg=0;
  686. WPARAM n=0;
  687. while( i<t->length && isspace(t->buf[i]) ) ++i;
  688. if( i==t->length ) return 0;
  689. if( t->buf[i]=='+' ) ++i;
  690. else if( (neg = t->buf[i]=='-') ) ++i;
  691. if( i==t->length ) return 0;
  692. if( t->buf[i]=='%' ){
  693. for( ++i;i<t->length;++i ){
  694. int c=t->buf[i];
  695. if( c!='0' && c!='1' ) break;
  696. n=n*2+(c-'0');
  697. }
  698. }else if( t->buf[i]=='$' ){
  699. for( ++i;i<t->length;++i ){
  700. int c=toupper(t->buf[i]);
  701. if( !isxdigit(c) ) break;
  702. if( c>='A' ) c-=('A'-'0'-10);
  703. n=n*16+(c-'0');
  704. }
  705. }else{
  706. for( ;i<t->length;++i ){
  707. int c=t->buf[i];
  708. if( !isdigit(c) ) break;
  709. n=n*10+(c-'0');
  710. }
  711. }
  712. return neg ? -n : n;
  713. }
  714. BBString *bbStringFromWParam( WPARAM n ){
  715. char buf[64];
  716. #ifdef __x86_64__
  717. sprintf(buf, "%llu", n);
  718. #else
  719. sprintf(buf, "%u", n);
  720. #endif
  721. return bbStringFromBytes( (unsigned char*)buf, strlen(buf) );
  722. }
  723. LPARAM bbStringToLParam( BBString *t ){
  724. int i=0,neg=0;
  725. LPARAM n=0;
  726. while( i<t->length && isspace(t->buf[i]) ) ++i;
  727. if( i==t->length ) return 0;
  728. if( t->buf[i]=='+' ) ++i;
  729. else if( (neg=(t->buf[i]=='-')) ) ++i;
  730. if( i==t->length ) return 0;
  731. if( t->buf[i]=='%' ){
  732. for( ++i;i<t->length;++i ){
  733. int c=t->buf[i];
  734. if( c!='0' && c!='1' ) break;
  735. n=n*2+(c-'0');
  736. }
  737. }else if( t->buf[i]=='$' ){
  738. for( ++i;i<t->length;++i ){
  739. int c=toupper(t->buf[i]);
  740. if( !isxdigit(c) ) break;
  741. if( c>='A' ) c-=('A'-'0'-10);
  742. n=n*16+(c-'0');
  743. }
  744. }else{
  745. for( ;i<t->length;++i ){
  746. int c=t->buf[i];
  747. if( !isdigit(c) ) break;
  748. n=n*10+(c-'0');
  749. }
  750. }
  751. return neg ? -n : n;
  752. }
  753. BBString *bbStringFromLParam( LPARAM n ){
  754. char buf[64];
  755. #ifdef __x86_64__
  756. sprintf(buf, "%lld", n);
  757. #else
  758. sprintf(buf, "%d", n);
  759. #endif
  760. return bbStringFromBytes( (unsigned char*)buf, strlen(buf) );
  761. }
  762. #endif
  763. BBString *bbStringToLower( BBString *str ){
  764. int k;
  765. BBString *t;
  766. int n = 0;
  767. while (n < str->length) {
  768. int c = str->buf[n];
  769. if (c < 192) {
  770. // ASCII character
  771. if (c >= 'A' && c <= 'Z') {
  772. // Found an uppercase ASCII character
  773. break;
  774. }
  775. } else {
  776. // Unicode character
  777. // Check if the character is an uppercase Unicode character
  778. int lo = 0, hi = (3828 / 4) - 1; // sizeof(bbToLowerData) = 3828
  779. int is_upper = 0;
  780. while (lo <= hi) {
  781. int mid = (lo + hi) / 2;
  782. int upper = bbToLowerData[mid * 2];
  783. if (c < upper) {
  784. hi = mid - 1;
  785. } else if (c > upper) {
  786. lo = mid + 1;
  787. } else {
  788. // Found an uppercase Unicode character
  789. is_upper = 1;
  790. break;
  791. }
  792. }
  793. if (is_upper) {
  794. break;
  795. }
  796. }
  797. ++n;
  798. }
  799. if (n == str->length) {
  800. return str;
  801. }
  802. t=bbStringNew( str->length );
  803. if (n > 0) {
  804. memcpy(t->buf, str->buf, n * sizeof(BBChar));
  805. }
  806. for( k=n;k<str->length;++k ){
  807. int c=str->buf[k];
  808. if( c<192 ){
  809. c=(c>='A' && c<='Z') ? (c|32) : c;
  810. }else{
  811. int lo=0,hi=3828/4-1; // sizeof(bbToLowerData)=3828
  812. while( lo<=hi ){
  813. int mid=(lo+hi)/2;
  814. if( c<bbToLowerData[mid*2] ){
  815. hi=mid-1;
  816. }else if( c>bbToLowerData[mid*2] ){
  817. lo=mid+1;
  818. }else{
  819. c=bbToLowerData[mid*2+1];
  820. break;
  821. }
  822. }
  823. }
  824. t->buf[k]=c;
  825. }
  826. return t;
  827. }
  828. BBString *bbStringToUpper( BBString *str ){
  829. int k;
  830. BBString *t;
  831. int n = 0;
  832. while (n < str->length) {
  833. int c = str->buf[n];
  834. if (c < 181) {
  835. // ASCII character
  836. if (c >= 'a' && c <= 'z') {
  837. // Found a lowercase ASCII character
  838. break;
  839. }
  840. } else {
  841. // Unicode character
  842. // Check if the character is a lowercase Unicode character
  843. int lo = 0, hi = (3860 / 4) - 1; // sizeof(bbToUpperData) = 3860
  844. int is_lower = 0;
  845. while (lo <= hi) {
  846. int mid = (lo + hi) / 2;
  847. int lower = bbToUpperData[mid * 2];
  848. if (c < lower) {
  849. hi = mid - 1;
  850. } else if (c > lower) {
  851. lo = mid + 1;
  852. } else {
  853. // Found a lowercase Unicode character
  854. is_lower = 1;
  855. break;
  856. }
  857. }
  858. if (is_lower) {
  859. break;
  860. }
  861. }
  862. ++n;
  863. }
  864. if (n == str->length) {
  865. return str;
  866. }
  867. t=bbStringNew( str->length );
  868. if (n > 0) {
  869. memcpy(t->buf, str->buf, n * sizeof(BBChar));
  870. }
  871. for( k=n;k<str->length;++k ){
  872. int c=str->buf[k];
  873. if( c<181 ){
  874. c=(c>='a' && c<='z') ? (c&~32) : c;
  875. }else{
  876. int lo=0,hi= 3860/4-1; // sizeof(bbToUpperData)= 3860
  877. while( lo<=hi ){
  878. int mid=(lo+hi)/2;
  879. if( c<bbToUpperData[mid*2] ){
  880. hi=mid-1;
  881. }else if( c>bbToUpperData[mid*2] ){
  882. lo=mid+1;
  883. }else{
  884. c=bbToUpperData[mid*2+1];
  885. break;
  886. }
  887. }
  888. }
  889. t->buf[k]=c;
  890. }
  891. return t;
  892. }
  893. unsigned char *bbStringToCString( BBString *str ){
  894. unsigned char *p;
  895. int k,sz=str->length;
  896. p=(unsigned char*)bbMemAlloc( sz+1 );
  897. for( k=0;k<sz;++k ) p[k]=str->buf[k];
  898. p[sz]=0;
  899. return p;
  900. }
  901. BBChar *bbStringToWString( BBString *str ){
  902. BBChar *p;
  903. size_t sz=str->length + 1;
  904. p=(BBChar*)bbMemAlloc( sz * sizeof(BBChar) );
  905. return bbStringToWStringBuffer(str, p, &sz);
  906. }
  907. BBChar *bbStringToWStringBuffer( BBString *str, BBChar * buf, size_t * length ){
  908. size_t sz = str->length + 1 < *length ? str->length + 1 : *length;
  909. BBChar * p = buf;
  910. memcpy(p,str->buf,sz*sizeof(BBChar));
  911. p[sz-1]=0;
  912. return p;
  913. }
  914. unsigned char *bbStringToUTF8String( BBString *str ){
  915. int len=str->length;
  916. size_t buflen = len * 4 + 1;
  917. unsigned char *buf=(unsigned char*)bbMemAlloc( buflen );
  918. return bbStringToUTF8StringBuffer(str, buf, &buflen);
  919. }
  920. unsigned char *bbStringToUTF8StringBuffer( BBString *str, unsigned char * buf, size_t * length ){
  921. int i=0,len=str->length;
  922. size_t buflen = *length;
  923. unsigned char *q=buf;
  924. unsigned short *p=str->buf;
  925. while (i < len) {
  926. unsigned int c=*p++;
  927. if(0xd800 <= c && c <= 0xdbff && i < len - 1) {
  928. /* surrogate pair */
  929. unsigned int c2 = *p;
  930. if(0xdc00 <= c2 && c2 <= 0xdfff) {
  931. /* valid second surrogate */
  932. c = ((c - 0xd800) << 10) + (c2 - 0xdc00) + 0x10000;
  933. ++p;
  934. ++i;
  935. }
  936. }
  937. size_t n = q - buf;
  938. if( c<0x80 ){
  939. if (buflen <= n+1) break;
  940. *q++=c;
  941. }else if( c<0x800 ){
  942. if (buflen <= n+2) break;
  943. *q++=0xc0|(c>>6);
  944. *q++=0x80|(c&0x3f);
  945. }else if(c < 0x10000) {
  946. if (buflen <= n+3) break;
  947. *q++=0xe0|(c>>12);
  948. *q++=0x80|((c>>6)&0x3f);
  949. *q++=0x80|(c&0x3f);
  950. }else if(c <= 0x10ffff) {
  951. if (buflen <= n+4) break;
  952. *q++ = 0xf0|(c>>18);
  953. *q++ = 0x80|((c>>12)&0x3f);
  954. *q++ = 0x80|((c>>6)&0x3f);
  955. *q++ = 0x80|((c&0x3f));
  956. }else{
  957. bbExThrowCString( "Unicode character out of UTF-8 range" );
  958. }
  959. ++i;
  960. }
  961. *q=0;
  962. *length = q - buf;
  963. return buf;
  964. }
  965. BBArray *bbStringSplit( BBString *str,BBString *sep ){
  966. int i,i2,n;
  967. BBString **p,*bit;
  968. BBArray *bits;
  969. if( sep->length ){
  970. i=0;n=1;
  971. while( (i2=bbStringFind( str,sep,i ))!=-1 ){
  972. ++n;
  973. i=i2+sep->length;
  974. }
  975. bits=bbArrayNew1D( "$",n );
  976. p=(BBString**)BBARRAYDATA( bits,1 );
  977. i=0;
  978. while( n-- ){
  979. i2=bbStringFind( str,sep,i );
  980. if( i2==-1 ) i2=str->length;
  981. bit=bbStringSlice( str,i,i2 );
  982. //BBINCREFS( bit );
  983. *p++=bit;
  984. i=i2+sep->length;
  985. }
  986. return bits;
  987. }
  988. i=0;n=0;
  989. for(;;){
  990. while( i!=str->length && str->buf[i]<33 ) ++i;
  991. if( i++==str->length ) break;
  992. while( i!=str->length && str->buf[i]>32 ) ++i;
  993. ++n;
  994. }
  995. if( !n ) return &bbEmptyArray;
  996. bits=bbArrayNew1D( "$",n );
  997. p=(BBString**)BBARRAYDATA( bits,1 );
  998. i=0;
  999. while( n-- ){
  1000. while( str->buf[i]<33 ) ++i;
  1001. i2=i++;
  1002. while( i!=str->length && str->buf[i]>32 ) ++i;
  1003. bit=bbStringSlice( str,i2,i );
  1004. //BBINCREFS( bit );
  1005. *p++=bit;
  1006. }
  1007. return bits;
  1008. }
  1009. BBString *bbStringJoin( BBString *sep,BBArray *bits ){
  1010. int i,sz=0;
  1011. int n_bits=bits->scales[0];
  1012. BBString **p,*str;
  1013. BBChar *t;
  1014. if( bits==&bbEmptyArray ){
  1015. return &bbEmptyString;
  1016. }
  1017. p=(BBString**)BBARRAYDATA( bits,1 );
  1018. for( i=0;i<n_bits;++i ){
  1019. BBString *bit=*p++;
  1020. sz+=bit->length;
  1021. }
  1022. sz+=(n_bits-1)*sep->length;
  1023. str=bbStringNew( sz );
  1024. t=str->buf;
  1025. p=(BBString**)BBARRAYDATA( bits,1 );
  1026. for( i=0;i<n_bits;++i ){
  1027. if( i ){
  1028. memcpy( t,sep->buf,sep->length*sizeof(BBChar) );
  1029. t+=sep->length;
  1030. }
  1031. BBString *bit=*p++;
  1032. memcpy( t,bit->buf,bit->length*sizeof(BBChar) );
  1033. t+=bit->length;
  1034. }
  1035. return str;
  1036. }
  1037. #ifndef __ANDROID__
  1038. #ifndef __EMSCRIPTEN__
  1039. static void mktmp( void *p ){
  1040. static AO_t i;
  1041. static void *bufs[32];
  1042. int n=AO_fetch_and_add1( &i ) & 31;
  1043. bbMemFree( bufs[n] );
  1044. bufs[n]=p;
  1045. }
  1046. #else
  1047. static void mktmp( void *p ){
  1048. static int i;
  1049. static void *bufs[32];
  1050. int n=++i & 31;
  1051. bbMemFree( bufs[n] );
  1052. bufs[n]=p;
  1053. }
  1054. #endif
  1055. #else
  1056. static void mktmp( void *p ){
  1057. static int i;
  1058. static void *bufs[32];
  1059. int n= __sync_fetch_and_add( &i, 1 ) & 31;
  1060. bbMemFree( bufs[n] );
  1061. bufs[n]=p;
  1062. }
  1063. #endif
  1064. char *bbTmpCString( BBString *str ){
  1065. printf("Use of bbTmpCString is deprecated\n");fflush(stdout);
  1066. char *p=(char*)bbStringToCString( str );
  1067. mktmp( p );
  1068. return p;
  1069. }
  1070. BBChar *bbTmpWString( BBString *str ){
  1071. printf("Use of bbTmpWString is deprecated\n");fflush(stdout);
  1072. BBChar *p=bbStringToWString( str );
  1073. mktmp( p );
  1074. return p;
  1075. }
  1076. char *bbTmpUTF8String( BBString *str ){
  1077. printf("Use of bbTmpUTF8String is deprecated\n");fflush(stdout);
  1078. char *p=(char*)bbStringToUTF8String( str );
  1079. mktmp( p );
  1080. return p;
  1081. }
  1082. BBUINT* bbStringToUTF32String( BBString *str ) {
  1083. int len=str->length;
  1084. int n = 0;
  1085. size_t buflen = len * 4 + 4;
  1086. BBUINT *buf=(BBUINT*)bbMemAlloc( buflen );
  1087. BBChar *p=str->buf;
  1088. BBUINT *bp = buf;
  1089. while( *p ) {
  1090. n++;
  1091. BBChar c = *p++;
  1092. if (!((c - 0xd800u) < 2048u)) {
  1093. *bp++ = c;
  1094. } else {
  1095. if (((c & 0xfffffc00) == 0xd800) && n < len && ((*p & 0xfffffc00) == 0xdc00)) {
  1096. *bp++ = (c << 10) + (*p++) - 0x35fdc00;
  1097. } else {
  1098. bbMemFree( buf );
  1099. bbExThrowCString( "Failed to create UTF32. Invalid surrogate pair." );
  1100. }
  1101. }
  1102. }
  1103. *bp = 0;
  1104. return buf;
  1105. }
  1106. BBString* bbStringFromUTF32String( const BBUINT *p ) {
  1107. return p ? bbStringFromUTF32Bytes(p, utf32strlen(p)) : &bbEmptyString;
  1108. }
  1109. BBString* bbStringFromUTF32Bytes( const BBUINT *p, int n ) {
  1110. if( !p || n <= 0 ) return &bbEmptyString;
  1111. int len = n * 2;
  1112. unsigned short * d=(unsigned short*)malloc( n * sizeof(BBChar) * 2 );
  1113. unsigned short * q=d;
  1114. BBUINT* bp = p;
  1115. int i = 0;
  1116. while (i++ < n) {
  1117. BBUINT c = *bp++;
  1118. if (c <= 0xffffu) {
  1119. if (c >= 0xd800u && c <= 0xdfffu) {
  1120. *q++ = 0xfffd;
  1121. } else {
  1122. *q++ = c;
  1123. }
  1124. } else if (c > 0x0010ffffu) {
  1125. *q++ = 0xfffd;
  1126. } else {
  1127. c -= 0x0010000u;
  1128. *q++ = (BBChar)((c >> 10) + 0xd800);
  1129. *q++ = (BBChar)((c & 0x3ffu) + 0xdc00);
  1130. }
  1131. }
  1132. BBString * str=bbStringFromShorts( d,q-d );
  1133. free( d );
  1134. return str;
  1135. }