Memory.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989
  1. /******************************************************************************/
  2. #include "stdafx.h"
  3. #define MEM_SINGLE_MAX 0 // measure highest size for a single allocation
  4. #define MEM_LEAK_FULL 0 // record all allocations
  5. #define MEM_GUARD (DEBUG ? 16 : 0) // number of bytes to be used as a guard, use 0 to disable
  6. #define MEM_PEAK 0 // measure peak allocation of all memory
  7. #define MEM_COUNT_USAGE (0 || MEM_PEAK || MEM_GUARD) // when using MEM_PEAK or MEM_GUARD then MEM_COUNT_USAGE must be always enabled
  8. #define MEM_CUSTOM (MEM_LEAK_FULL || MEM_COUNT_USAGE || MEM_GUARD)
  9. #if (MEM_SINGLE_MAX || MEM_LEAK_FULL || MEM_GUARD || MEM_PEAK || MEM_COUNT_USAGE) && !DEBUG
  10. #pragma message("!! Warning: Use this only for debugging !!")
  11. #endif
  12. /******************************************************************************/
  13. namespace EE{
  14. /******************************************************************************/
  15. Bool MemStats::get()
  16. {
  17. #if WINDOWS_OLD
  18. MEMORYSTATUSEX ms;
  19. ms.dwLength=SIZE(ms);
  20. if(GlobalMemoryStatusEx(&ms))
  21. {
  22. usage =ms.dwMemoryLoad;
  23. avail_phys=ms.ullAvailPhys;
  24. total_phys=ms.ullTotalPhys;
  25. avail_page=ms.ullAvailPageFile;
  26. total_page=ms.ullTotalPageFile;
  27. avail_virt=ms.ullAvailVirtual;
  28. total_virt=ms.ullTotalVirtual;
  29. return true;
  30. }
  31. Zero(T); return false;
  32. #elif WINDOWS_NEW
  33. total_phys=total_page=total_virt= Windows::System::MemoryManager::AppMemoryUsageLimit;
  34. avail_phys=avail_page=avail_virt=total_phys-Windows::System::MemoryManager::AppMemoryUsage;
  35. if(total_phys)usage=(total_phys-avail_phys)*100/total_phys;
  36. return true;
  37. #elif APPLE
  38. Zero(T);
  39. int mib[]={CTL_HW, HW_MEMSIZE};
  40. Long physical_memory;
  41. size_t length=SIZE(physical_memory);
  42. sysctl(mib, 2, &physical_memory, &length, null, 0);
  43. total_phys=physical_memory;
  44. mach_port_t host_priv_port=mach_host_self();
  45. vm_size_t page_size; host_page_size(host_priv_port, &page_size);
  46. vm_statistics_data_t vm_stat;
  47. mach_msg_type_number_t host_count=HOST_VM_INFO_COUNT;
  48. if(host_statistics(host_priv_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_count)==KERN_SUCCESS)
  49. {
  50. avail_phys=avail_virt=avail_page=page_size* vm_stat.free_count;
  51. total_virt=total_page=page_size*(vm_stat.free_count+vm_stat.active_count+vm_stat.inactive_count+vm_stat.wire_count);
  52. if(total_virt)usage=(total_virt-avail_virt)*100/total_virt;
  53. return true;
  54. }
  55. return false;
  56. #else
  57. Char8 data[2048]; if(UnixReadFile("/proc/meminfo", data, SIZE(data))) // sample output: "MemTotal: 1024 kB\nMemFree: 512 kB\nCached: 256 kB"
  58. {
  59. if(CChar8 *total =TextPos(data, "MemTotal", false, true))
  60. if(CChar8 *free =TextPos(data, "MemFree" , false, true))
  61. if(CChar8 *cached=TextPos(data, "Cached" , false, true))
  62. {
  63. total +=8; // Length("MemTotal") -> 8
  64. free +=7; // Length("MemFree" ) -> 7
  65. cached+=6; // Length("Cached" ) -> 6
  66. for(; *total ==' ' || *total ==':' || *total =='\t'; )total ++;
  67. for(; *free ==' ' || *free ==':' || *free =='\t'; )free ++;
  68. for(; *cached==' ' || *cached==':' || *cached=='\t'; )cached++;
  69. CalcValue value;
  70. total =_SkipWhiteChars(TextValue(total , value)); Long t=value.asLong(); if(Starts(total , "tb"))t<<=40;else if(Starts(total , "gb"))t<<=30;else if(Starts(total , "mb"))t<<=20;else if(Starts(total , "kb"))t<<=10;
  71. free =_SkipWhiteChars(TextValue(free , value)); Long f=value.asLong(); if(Starts(free , "tb"))f<<=40;else if(Starts(free , "gb"))f<<=30;else if(Starts(free , "mb"))f<<=20;else if(Starts(free , "kb"))f<<=10;
  72. cached=_SkipWhiteChars(TextValue(cached, value)); Long c=value.asLong(); if(Starts(cached, "tb"))c<<=40;else if(Starts(cached, "gb"))c<<=30;else if(Starts(cached, "mb"))c<<=20;else if(Starts(cached, "kb"))c<<=10;
  73. total_phys=t;
  74. avail_phys=f+c;
  75. usage=(total_phys ? (total_phys-avail_phys)*100/total_phys : 0);
  76. avail_virt=avail_page=avail_phys;
  77. total_virt=total_page=total_phys;
  78. return true;
  79. }
  80. }
  81. Zero(T); return false;
  82. #endif
  83. }
  84. /******************************************************************************/
  85. static Char8 Base64Chars[]={'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
  86. 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
  87. 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
  88. 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
  89. 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  90. 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
  91. 'w', 'x', 'y', 'z', '0', '1', '2', '3',
  92. '4', '5', '6', '7', '8', '9', '+', '/'};
  93. Str8 Base64(CPtr data, Int size)
  94. {
  95. Str8 temp; if(size>0)
  96. {
  97. temp.reserve((size+2)/3*4);
  98. Byte *d=(Byte*)data;
  99. Int mod=size%3;
  100. REP((size-mod)/3)
  101. {
  102. UInt a=*d++;
  103. UInt b=*d++;
  104. UInt c=*d++;
  105. UInt triple=(a<<0x10)+(b<<0x08)+c;
  106. temp+=Base64Chars[(triple>>3*6)&0x3F];
  107. temp+=Base64Chars[(triple>>2*6)&0x3F];
  108. temp+=Base64Chars[(triple>>1*6)&0x3F];
  109. temp+=Base64Chars[(triple>>0*6)&0x3F];
  110. }
  111. if(mod)
  112. {
  113. UInt a= *d++; // always
  114. UInt b=((mod==2) ? *d++ : 0); // conditional
  115. UInt triple=(a<<0x10)+(b<<0x08);
  116. temp+= Base64Chars[(triple>>3*6)&0x3F];
  117. temp+= Base64Chars[(triple>>2*6)&0x3F];
  118. temp+=((mod==1) ? '=' : Base64Chars[(triple>>1*6)&0x3F]);
  119. temp+= '=';
  120. }
  121. }
  122. return temp;
  123. }
  124. /******************************************************************************/
  125. // MEM
  126. /******************************************************************************/
  127. #if MEM_LEAK_FULL
  128. struct AllocData
  129. {
  130. Ptr data;
  131. Str call_stack;
  132. AllocData() {GetCallStackFast(call_stack);}
  133. };
  134. static SyncLock MemInsideLock;
  135. static Bool MemInside;
  136. static Memx<AllocData> *MemLog; // set this as a pointer so its dtor won't get automatically called
  137. #endif
  138. #if MEM_COUNT_USAGE
  139. Long _MemUsage; // current memory usage
  140. #endif
  141. #if MEM_PEAK
  142. Long _MemPeakUsage; // peak memory usage
  143. #endif
  144. #if MEM_SINGLE_MAX
  145. IntPtr MaxSize;
  146. #endif
  147. Long MemUsage()
  148. {
  149. #if MEM_COUNT_USAGE
  150. return _MemUsage;
  151. #else
  152. return 0;
  153. #endif
  154. }
  155. Long MemPeakUsage()
  156. {
  157. #if MEM_PEAK
  158. return _MemPeakUsage;
  159. #else
  160. return 0;
  161. #endif
  162. }
  163. void ClearMemPeakUsage()
  164. {
  165. #if MEM_PEAK
  166. AtomicSet(_MemPeakUsage, AtomicGet(_MemUsage));
  167. #endif
  168. }
  169. void ListMemLeaks()
  170. {
  171. #if MEM_LEAK_FULL
  172. SafeSyncLocker locker(MemInsideLock);
  173. MemInside=true;
  174. if(MemLog)FREPA(*MemLog)
  175. {
  176. LogN(S+"Leak #"+i);
  177. LogN((*MemLog)[i].call_stack);
  178. LogN();
  179. }
  180. MemInside=false;
  181. #endif
  182. }
  183. static void AllocError(IntPtr size)
  184. {
  185. Char8 temp[256];
  186. Char error[512];
  187. Set (error, "Can't allocate "); Append(error, TextInt(size, temp, -1, 3));
  188. Append(error, " bytes of memory.\nApplication Type: "
  189. #if X64
  190. "64-bit"
  191. #else
  192. "32-bit"
  193. #endif
  194. );
  195. MemStats mem; if(mem.get())
  196. {
  197. //Append(error, "\nMemory Usage: " ); Append(error, TextInt(mem.usage , temp)); Append(error, "%");
  198. Append(error, "\nPhysical Available: "); Append(error, TextInt(mem.avail_phys, temp, -1, 3)); Append(error, " / "); Append(error, TextInt(mem.total_phys, temp, -1, 3));
  199. Append(error, "\nVirtual Available: "); Append(error, TextInt(mem.avail_virt, temp, -1, 3)); Append(error, " / "); Append(error, TextInt(mem.total_virt, temp, -1, 3));
  200. if(mem.avail_page!=mem.avail_virt || mem.total_page!=mem.total_virt)
  201. { Append(error, "\nPaged Available: "); Append(error, TextInt(mem.avail_page, temp, -1, 3)); Append(error, " / "); Append(error, TextInt(mem.total_page, temp, -1, 3));
  202. }
  203. }
  204. ExitEx(error);
  205. }
  206. static INLINE void AllocInc() {AtomicInc(App._mem_leaks);}
  207. static INLINE void AllocDec() {AtomicDec(App._mem_leaks);}
  208. /******************************************************************************/
  209. // FREE
  210. /******************************************************************************/
  211. static INLINE Byte MemGuard(Int offset) {return Byte(0xEE + offset);}
  212. void Free(Ptr &data)
  213. {
  214. if(data)
  215. {
  216. #if MEM_LEAK_FULL || MEM_COUNT_USAGE || MEM_GUARD
  217. UInt before=0; // extra size needed before alloc
  218. #endif
  219. #if MEM_GUARD
  220. Byte *guard=(Byte*)data-MEM_GUARD; data=guard;
  221. #endif
  222. #if MEM_COUNT_USAGE
  223. IntPtr data_size; {IntPtr *size=(IntPtr*)data-1; data=size; data_size=*size; AtomicSub(_MemUsage, data_size);}
  224. before+=SIZE(data_size); // make room for size
  225. #endif
  226. #if MEM_GUARD
  227. before+=MEM_GUARD; // make room for guard
  228. FREP(MEM_GUARD)if(*guard++!=MemGuard(i))Exit("Mem Guard was modified"); guard+=data_size;
  229. FREP(MEM_GUARD)if(*guard++!=MemGuard(i))Exit("Mem Guard was modified");
  230. #endif
  231. #if MEM_LEAK_FULL
  232. before+=SIZE(Int); // make room for leak data
  233. {
  234. Int *index=(Int*)data-1; data=index;
  235. SafeSyncLocker locker(MemInsideLock);
  236. if(!MemInside)
  237. {
  238. MemInside=true;
  239. App._mem_leaks--;
  240. { // braces to make sure locals are removed inside the block
  241. AllocData &ad=MemLog->absElm(*index);
  242. if(ad.data!=index)Exit("Invalid mem link");
  243. MemLog->removeAbs(*index);
  244. }
  245. MemInside=false;
  246. }
  247. }
  248. #else
  249. AllocDec();
  250. #endif
  251. #if MEM_LEAK_FULL || MEM_COUNT_USAGE || MEM_GUARD
  252. data=(Byte*)data+before-Ceil16(before); // we have processed only 'before' but we need to process 'Ceil16(before)', so go forward to what was processed, and go back by Ceil
  253. #endif
  254. free(data);
  255. data=null;
  256. }
  257. }
  258. /******************************************************************************/
  259. // ALLOCATE
  260. /******************************************************************************/
  261. Ptr Alloc(IntPtr size)
  262. {
  263. if(size>0)
  264. {
  265. #if MEM_SINGLE_MAX
  266. again:
  267. Long max=AtomicGet(MaxSize);
  268. if(size>max && !AtomicCAS(MaxSize, max, size))goto again;
  269. #endif
  270. #if MEM_LEAK_FULL || MEM_COUNT_USAGE || MEM_GUARD
  271. UInt before=0, after=0; // extra size needed before and after alloc
  272. #endif
  273. #if MEM_LEAK_FULL
  274. before+=SIZE(Int); // make room for leak data
  275. #endif
  276. #if MEM_COUNT_USAGE
  277. IntPtr data_size=size; // remember this before increasing the size
  278. before+=SIZE(data_size); // make room for size
  279. #endif
  280. #if MEM_GUARD
  281. before+=MEM_GUARD; after+=MEM_GUARD; // make room for guard
  282. #endif
  283. #if MEM_LEAK_FULL || MEM_COUNT_USAGE || MEM_GUARD
  284. before=Ceil16(before); // !! need to align 'before' because some codes (custom or in the system) make assumptions to malloc being aligned, for example without this, Apple platforms can crash !!
  285. size+=before+after;
  286. #endif
  287. if(Ptr data=malloc(size))
  288. {
  289. #if MEM_LEAK_FULL || MEM_COUNT_USAGE || MEM_GUARD
  290. data=(Byte*)data+before; // data that will be returned to the user
  291. Ptr debug=data; // start from 'data' and go back and write debug info
  292. #endif
  293. #if MEM_GUARD // needs to be processed first - written next to 'data'
  294. {Byte *guard=(Byte*)debug-MEM_GUARD; debug=guard; FREP(MEM_GUARD)*guard++=MemGuard(i); guard=(Byte*)data+data_size; FREP(MEM_GUARD)*guard++=MemGuard(i);}
  295. #endif
  296. #if MEM_COUNT_USAGE
  297. {
  298. IntPtr *size=(IntPtr*)debug-1; debug=size; *size=data_size; Long old_usage=AtomicAdd(_MemUsage, data_size);
  299. #if MEM_PEAK
  300. for(Long new_usage=old_usage+data_size; ; )
  301. {
  302. Long peak=AtomicGet(_MemPeakUsage);
  303. if(new_usage<=peak
  304. || AtomicCAS(_MemPeakUsage, peak, new_usage))break;
  305. }
  306. #endif
  307. }
  308. #endif
  309. #if MEM_LEAK_FULL
  310. {
  311. Int *index=(Int*)debug-1; debug=index; *index=-1;
  312. SafeSyncLocker locker(MemInsideLock);
  313. if(!MemInside)
  314. {
  315. App._mem_leaks++;
  316. MemInside=true;
  317. { // braces to make sure locals are removed inside the block
  318. if(!MemLog)New(MemLog);
  319. AllocData &ad=MemLog->New();
  320. *index=MemLog->absIndex(&ad);
  321. ad.data=index;
  322. }
  323. MemInside=false;
  324. }
  325. }
  326. #else
  327. AllocInc();
  328. #endif
  329. // test alignment to verify that EE codes haven't messed up required alignment by the system (in case some codes or the system make assumption about alignment)
  330. #if X64 || APPLE // 64-bit platforms and all Apple have 16-byte alignment
  331. DEBUG_ASSERT((UIntPtr(data)&15)==0, "Memory is not 16-byte aligned");
  332. #else // other platforms should have at least 8-byte alignment
  333. // https://msdn.microsoft.com/en-us/library/ycsb6wwf.aspx
  334. // http://www.gnu.org/software/libc/manual/html_node/Aligned-Memory-Blocks.html
  335. DEBUG_ASSERT((UIntPtr(data)& 7)==0, "Memory is not 8-byte aligned");
  336. #endif
  337. return data;
  338. }
  339. AllocError(size);
  340. }
  341. return null;
  342. }
  343. Ptr AllocZero(IntPtr size)
  344. {
  345. Ptr data=Alloc(size); Zero(data, size);
  346. return data;
  347. }
  348. /******************************************************************************/
  349. // REALLOCATE
  350. /******************************************************************************/
  351. void _Realloc(Ptr &data, IntPtr size_new, IntPtr size_old)
  352. {
  353. MAX(size_new, 0);
  354. MAX(size_old, 0);
  355. if(size_new!=size_old)
  356. {
  357. Ptr p=Alloc(size_new);
  358. Copy(p, data, Min(size_new, size_old)); // copy old data
  359. Free(data); data=p;
  360. }
  361. }
  362. void _ReallocZero(Ptr &data, IntPtr size_new, IntPtr size_old)
  363. {
  364. MAX(size_new, 0);
  365. MAX(size_old, 0);
  366. if(size_new!=size_old)
  367. {
  368. Ptr p=Alloc(size_new);
  369. Copy( p, data, Min(size_new, size_old)); // copy old data
  370. Zero((Byte*)p+size_old, size_new- size_old ); // zero new data
  371. Free(data); data=p;
  372. }
  373. }
  374. /******************************************************************************/
  375. // ALIGNED
  376. /******************************************************************************/
  377. #if WINDOWS && !X64 && !MEM_CUSTOM // Win 32-bit has only 8-byte alignment
  378. Ptr AlignedAlloc(IntPtr size)
  379. {
  380. if(size>0)
  381. {
  382. if(Ptr data=_aligned_malloc(size, 16))
  383. {
  384. AllocInc();
  385. return data;
  386. }
  387. AllocError(size);
  388. }
  389. return null;
  390. }
  391. void AlignedFree(Ptr &data)
  392. {
  393. if(data)
  394. {
  395. AllocDec ();
  396. _aligned_free(data);
  397. data=null;
  398. }
  399. }
  400. #elif (LINUX || ANDROID) && !MEM_CUSTOM
  401. Ptr AlignedAlloc(IntPtr size)
  402. {
  403. if(size>0)
  404. {
  405. if(Ptr data=memalign(16, size))
  406. {
  407. AllocInc();
  408. return data;
  409. }
  410. AllocError(size);
  411. }
  412. return null;
  413. }
  414. void AlignedFree(Ptr &data)
  415. {
  416. if(data)
  417. {
  418. AllocDec();
  419. free (data);
  420. data=null;
  421. }
  422. }
  423. #elif (defined WII) || MEM_CUSTOM
  424. typedef Byte AAOffs; // Byte is enough to store the 0..16 offset
  425. Ptr AlignedAlloc(IntPtr size)
  426. {
  427. if(size>0)
  428. {
  429. Int padd=15+SIZE(AAOffs); // 15 for alignment and AAOffs for offset to the original pointer
  430. if(Byte *original=Alloc<Byte>(size+padd))
  431. {
  432. Byte *aligned=(Byte*)(UIntPtr(original+padd)&~15);
  433. ((AAOffs*)aligned)[-1]=aligned-original; // store offset to the original pointer
  434. return aligned;
  435. }
  436. }
  437. return null;
  438. }
  439. void AlignedFree(Ptr &data)
  440. {
  441. if(data)
  442. {
  443. Ptr original=((Byte*)data)-((AAOffs*)data)[-1];
  444. Free(original);
  445. data=null;
  446. }
  447. }
  448. #else // PS3, XBox, Win64, Apple have 16-byte alignment
  449. Ptr AlignedAlloc(IntPtr size) {Ptr data=Alloc(size); DEBUG_ASSERT((UIntPtr(data)&15)==0, "Memory is not 16-byte aligned"); return data;}
  450. void AlignedFree (Ptr &data) {Free(data);}
  451. #endif
  452. /******************************************************************************/
  453. // ZERO
  454. /******************************************************************************/
  455. void Zero(Ptr data, IntPtr size)
  456. {
  457. if(data && size>0)memset(data, 0, size);
  458. }
  459. /******************************************************************************/
  460. // SET
  461. /******************************************************************************/
  462. void SetMem(Ptr data, Byte value, IntPtr size)
  463. {
  464. if(data && size>0)memset(data, value, size);
  465. }
  466. /******************************************************************************/
  467. // COPY
  468. /******************************************************************************/
  469. void Copy(Ptr dest, CPtr src, IntPtr size)
  470. {
  471. if(dest && size>0)
  472. {
  473. if(src)memmove(dest, src, size);
  474. else memset (dest, 0, size);
  475. }
  476. }
  477. void Copy(Ptr dest, CPtr src, IntPtr dest_size, IntPtr src_size)
  478. {
  479. if(dest && dest_size>0)
  480. {
  481. if(!src || src_size<0)src_size=0;
  482. if(dest_size>src_size)
  483. {
  484. memmove( dest , src, src_size); // copy what we have
  485. memset ((Byte*)dest+src_size, 0, dest_size-src_size); // zero what's left
  486. }else
  487. {
  488. memmove( dest , src, dest_size ); // copy what we have
  489. }
  490. }
  491. }
  492. /******************************************************************************/
  493. void _CopyIs(Ptr dest, CPtr src, C MemPtr<Bool> &is, UInt elm_size)
  494. {
  495. if(dest && src)switch(elm_size)
  496. {
  497. case 1:
  498. {
  499. Byte *d=(Byte*)dest,
  500. *s=(Byte*)src ;
  501. FREPA(is){if(is[i])*d++=*s; s++;}
  502. }break;
  503. case 4:
  504. {
  505. UInt *d=(UInt*)dest,
  506. *s=(UInt*)src ;
  507. FREPA(is){if(is[i])*d++=*s; s++;}
  508. }break;
  509. default:
  510. {
  511. Byte *d=(Byte*)dest,
  512. *s=(Byte*)src ;
  513. FREPA(is){if(is[i]){CopyFast(d, s, elm_size); d+=elm_size;} s+=elm_size;}
  514. }break;
  515. }
  516. }
  517. /******************************************************************************/
  518. void _CopyList(Ptr dest, CPtr src, C MemPtr<Int> &list, UInt elm_size)
  519. {
  520. if(dest && src)switch(elm_size)
  521. {
  522. case 1:
  523. {
  524. Byte *d=(Byte*)dest,
  525. *s=(Byte*)src ;
  526. FREPA(list)*d++=s[list[i]];
  527. }break;
  528. case 4:
  529. {
  530. UInt *d=(UInt*)dest,
  531. *s=(UInt*)src ;
  532. FREPA(list)*d++=s[list[i]];
  533. }break;
  534. default:
  535. {
  536. Byte *d=(Byte*)dest,
  537. *s=(Byte*)src ;
  538. FREPA(list){CopyFast(d, s+list[i]*elm_size, elm_size); d+=elm_size;}
  539. }break;
  540. }
  541. }
  542. /******************************************************************************/
  543. void Copy8To16(Ptr dest, CPtr src, Int elms)
  544. {
  545. #if X64 || ARM || !WINDOWS
  546. if(U16 *d=(U16*)dest)
  547. if(U8 *s=(U8 *)src )REP(elms)*d++=*s++;
  548. #else
  549. _asm
  550. {
  551. mov esi, src
  552. or esi, esi
  553. jz end
  554. mov edi, dest
  555. or edi, edi
  556. jz end
  557. mov ecx, elms
  558. or ecx, ecx
  559. jle end // signed<=
  560. xor eax, eax
  561. start:
  562. lodsb
  563. stosw
  564. dec ecx
  565. jnz start
  566. end:
  567. }
  568. #endif
  569. }
  570. void Copy8To32(Ptr dest, CPtr src, Int elms)
  571. {
  572. #if X64 || ARM || !WINDOWS
  573. if(U32 *d=(U32*)dest)
  574. if(U8 *s=(U8 *)src )REP(elms)*d++=*s++;
  575. #else
  576. _asm
  577. {
  578. mov esi, src
  579. or esi, esi
  580. jz end
  581. mov edi, dest
  582. or edi, edi
  583. jz end
  584. mov ecx, elms
  585. or ecx, ecx
  586. jle end // signed<=
  587. xor eax, eax
  588. start:
  589. lodsb
  590. stosd
  591. dec ecx
  592. jnz start
  593. end:
  594. }
  595. #endif
  596. }
  597. void Copy16To8(Ptr dest, CPtr src, Int elms)
  598. {
  599. #if X64 || ARM || !WINDOWS
  600. if(U8 *d=(U8 *)dest)
  601. if(U16 *s=(U16*)src )REP(elms)*d++=*s++;
  602. #else
  603. _asm
  604. {
  605. mov esi, src
  606. or esi, esi
  607. jz end
  608. mov edi, dest
  609. or edi, edi
  610. jz end
  611. mov ecx, elms
  612. or ecx, ecx
  613. jle end // signed<=
  614. start:
  615. lodsw
  616. stosb
  617. dec ecx
  618. jnz start
  619. end:
  620. }
  621. #endif
  622. }
  623. void Copy16To32(Ptr dest, CPtr src, Int elms)
  624. {
  625. #if X64 || ARM || !WINDOWS
  626. if(U32 *d=(U32*)dest)
  627. if(U16 *s=(U16*)src )REP(elms)*d++=*s++;
  628. #else
  629. _asm
  630. {
  631. mov esi, src
  632. or esi, esi
  633. jz end
  634. mov edi, dest
  635. or edi, edi
  636. jz end
  637. mov ecx, elms
  638. or ecx, ecx
  639. jle end // signed<=
  640. xor eax, eax
  641. start:
  642. lodsw
  643. stosd
  644. dec ecx
  645. jnz start
  646. end:
  647. }
  648. #endif
  649. }
  650. void Copy24To32(Ptr dest, CPtr src, Int elms)
  651. {
  652. #if X64 || ARM || !WINDOWS
  653. if(U8 *d=(U8*)dest)
  654. if(U8 *s=(U8*)src )if(elms)
  655. {
  656. REP(elms-1)
  657. {
  658. *(U32*)d=*(U32*)s;
  659. d[3]=0;
  660. d+=4;
  661. s+=3;
  662. }
  663. *(U16*)d =*(U16*)s ;
  664. d[2]= s[2];
  665. d[3]= 0;
  666. }
  667. #else
  668. _asm
  669. {
  670. mov esi, src
  671. or esi, esi
  672. jz end
  673. mov edi, dest
  674. or edi, edi
  675. jz end
  676. mov ecx, elms
  677. or ecx, ecx
  678. jle end // signed<=
  679. dec ecx
  680. jz last
  681. start:
  682. movsd
  683. mov byte ptr[edi-1], 0
  684. dec esi
  685. dec ecx
  686. jnz start
  687. last:
  688. movsw
  689. movsb
  690. mov byte ptr[edi], 0
  691. end:
  692. }
  693. #endif
  694. }
  695. void Copy32To8(Ptr dest, CPtr src, Int elms)
  696. {
  697. #if X64 || ARM || !WINDOWS
  698. if(U8 *d=(U8 *)dest)
  699. if(U32 *s=(U32*)src )REP(elms)*d++=*s++;
  700. #else
  701. _asm
  702. {
  703. mov esi, src
  704. or esi, esi
  705. jz end
  706. mov edi, dest
  707. or edi, edi
  708. jz end
  709. mov ecx, elms
  710. or ecx, ecx
  711. jle end // signed<=
  712. start:
  713. lodsd
  714. stosb
  715. dec ecx
  716. jnz start
  717. end:
  718. }
  719. #endif
  720. }
  721. void Copy32To16(Ptr dest, CPtr src, Int elms)
  722. {
  723. #if X64 || ARM || !WINDOWS
  724. if(U16 *d=(U16*)dest)
  725. if(U32 *s=(U32*)src )REP(elms)*d++=*s++;
  726. #else
  727. _asm
  728. {
  729. mov esi, src
  730. or esi, esi
  731. jz end
  732. mov edi, dest
  733. or edi, edi
  734. jz end
  735. mov ecx, elms
  736. or ecx, ecx
  737. jle end // signed<=
  738. start:
  739. lodsd
  740. stosw
  741. dec ecx
  742. jnz start
  743. end:
  744. }
  745. #endif
  746. }
  747. void Copy32To24(Ptr dest, CPtr src, Int elms)
  748. {
  749. #if X64 || ARM || !WINDOWS
  750. if(U8 *d=(U8*)dest)
  751. if(U8 *s=(U8*)src )if(elms)
  752. {
  753. REP(elms-1)
  754. {
  755. *(U32*)d=*(U32*)s;
  756. d+=3;
  757. s+=4;
  758. }
  759. *(U16*)d =*(U16*)s ;
  760. d[2]= s[2];
  761. }
  762. #else
  763. _asm
  764. {
  765. mov esi, src
  766. or esi, esi
  767. jz end
  768. mov edi, dest
  769. or edi, edi
  770. jz end
  771. mov ecx, elms
  772. or ecx, ecx
  773. jle end // signed<=
  774. dec ecx
  775. jz last
  776. start:
  777. movsd
  778. dec edi
  779. dec ecx
  780. jnz start
  781. last:
  782. movsw
  783. movsb
  784. end:
  785. }
  786. #endif
  787. }
  788. /******************************************************************************/
  789. // SWAP
  790. /******************************************************************************/
  791. void SwapFast(Ptr a, Ptr b, IntPtr size) // !! this will crash if "!a || !b || size<0" !!
  792. {
  793. U32 *i1=(U32*) a, *i2=(U32*) b; REPP (size>>2)Swap(*i1++, *i2++); // size>>2 is faster than Int size/4
  794. U8 *j1=(U8 *)i1, *j2=(U8 *)i2; switch(size& 3) // size& 3 is faster than Int size%4 but doesn't work with negative numbers
  795. {
  796. case 3: Swap(*j1++, *j2++); // !! no break on purpose !!
  797. case 2: Swap(*j1++, *j2++); // !! no break on purpose !!
  798. case 1: Swap(*j1++, *j2++); // !! no break on purpose !!
  799. }
  800. }
  801. void Swap(Ptr a, Ptr b, IntPtr size)
  802. {
  803. if(a && b && a!=b && size>0)
  804. {
  805. U32 *i1=(U32*) a, *i2=(U32*) b; REPP (size>>2)Swap(*i1++, *i2++);
  806. U8 *j1=(U8 *)i1, *j2=(U8 *)i2; switch(size& 3)
  807. {
  808. case 3: Swap(*j1++, *j2++); // !! no break on purpose !!
  809. case 2: Swap(*j1++, *j2++); // !! no break on purpose !!
  810. case 1: Swap(*j1++, *j2++); // !! no break on purpose !!
  811. }
  812. }
  813. }
  814. void _ReverseOrder(Ptr data, Int elms, UInt elm_size)
  815. {
  816. if(elms>0)
  817. {
  818. Ptr end=(Byte*)data+(elms-1)*elm_size;
  819. elms>>=1;
  820. switch(elm_size)
  821. {
  822. case 1:
  823. REP(elms)
  824. {
  825. Swap(*(U8*)data, *(U8*)end);
  826. data=(U8*)data+1;
  827. end =(U8*)end -1;
  828. }
  829. break;
  830. case 4:
  831. REP(elms)
  832. {
  833. Swap(*(U32*)data, *(U32*)end);
  834. data=(U32*)data+1;
  835. end =(U32*)end -1;
  836. }
  837. break;
  838. case 8:
  839. REP(elms)
  840. {
  841. Swap(*(U64*)data, *(U64*)end);
  842. data=(U64*)data+1;
  843. end =(U64*)end -1;
  844. }
  845. break;
  846. default:
  847. REP(elms)
  848. {
  849. Swap(data, end, elm_size);
  850. data=(Byte*)data+elm_size;
  851. end =(Byte*)end -elm_size;
  852. }
  853. break;
  854. }
  855. }
  856. }
  857. void _RandomizeOrder(Ptr data, Int elms, UInt elm_size, Randomizer &random)
  858. {
  859. FREP(elms-1)Swap((Byte*)data+i*elm_size, (Byte*)data+random(i, elms-1)*elm_size, elm_size);
  860. }
  861. void _RotateOrder(Ptr data, Int elms, UInt elm_size, Int offset)
  862. {
  863. if(elms>1)
  864. {
  865. offset%=elms;
  866. if(offset)
  867. {
  868. if(offset<0)offset+=elms;
  869. Int offset_reversed=elms-offset;
  870. UInt required_size =Min(offset, offset_reversed)*elm_size;
  871. Memt<Byte> buf; buf.setNum(required_size); Ptr temp=buf.data();
  872. Byte *dest=(Byte*)data;
  873. if(offset<=offset_reversed) // ROR
  874. {
  875. // offset=2, offset_reversed=8
  876. // 0123456789 - sample data
  877. // 89 - temp
  878. // 01234567 - copy
  879. CopyFast(temp , dest+offset_reversed*elm_size, offset *elm_size); // copy second part into temp (here "89" )
  880. MoveFast(dest+offset*elm_size, dest , offset_reversed*elm_size); // move first part right (here "01234567")
  881. CopyFast(dest , temp , offset *elm_size); // copy temp to start (here "89" )
  882. }
  883. else // ROL
  884. {
  885. // offset=8, offset_reversed=2
  886. // 0123456789 - sample data
  887. // 01 - temp
  888. // 23456789 - copy
  889. CopyFast(temp , dest , offset_reversed*elm_size); // copy first part into temp (here "01" )
  890. MoveFast(dest , dest+offset_reversed*elm_size, offset *elm_size); // move second part left (here "01234567")
  891. CopyFast(dest+offset*elm_size, temp , offset_reversed*elm_size); // copy temp to end (here "89" )
  892. }
  893. }
  894. }
  895. }
  896. void _MoveElm(Ptr data, Int elms, UInt elm_size, Int elm, Int new_index) // move 'elm' element and preserve order of other elements, 01X2 -> X012
  897. {
  898. if(InRange(elm, elms))
  899. {
  900. Clamp(new_index, 0, elms-1); if(new_index!=elm)
  901. {
  902. Memt<Byte> buf; buf.setNum(elm_size); Ptr temp=buf.data();
  903. Byte *d=(Byte*)data;
  904. CopyFast(temp, d+elm*elm_size, elm_size); // copy element from data to temp memory
  905. // E N E N
  906. if(elm<new_index) // element is on the left, and we're moving it to the right, move the data to the left "0X123" -> "012X3"
  907. {
  908. MoveFast(d+elm*elm_size, d+(elm+1)*elm_size, (new_index-elm)*elm_size);
  909. } // N E N E
  910. else // element is on the right, and we're moving it to the left, move the data to the right "012X3" -> "0X123"
  911. {
  912. MoveFast(d+(new_index+1)*elm_size, d+new_index*elm_size, (elm-new_index)*elm_size);
  913. }
  914. CopyFast(d+new_index*elm_size, temp, elm_size); // copy element from temp memory back to the data
  915. }
  916. }
  917. }
  918. void _MoveElmLeftUnsafe(Ptr data, UInt elm_size, Int elm, Int new_index, Ptr temp) // !! assumes indexes are in range, "elm>=new_index", 'temp' can fit 'elm_size' !!
  919. {
  920. if(new_index!=elm)
  921. {
  922. Byte *d=(Byte*)data;
  923. CopyFast(temp, d+elm*elm_size, elm_size); // copy element from data to temp memory
  924. #if 0 // not needed since we're always moving left in this function
  925. // E N E N
  926. if(elm<new_index) // element is on the left, and we're moving it to the right, move the data to the left "0X123" -> "012X3"
  927. {
  928. MoveFast(d+elm*elm_size, d+(elm+1)*elm_size, (new_index-elm)*elm_size);
  929. } // N E N E
  930. else // element is on the right, and we're moving it to the left, move the data to the right "012X3" -> "0X123"
  931. #endif
  932. {
  933. MoveFast(d+(new_index+1)*elm_size, d+new_index*elm_size, (elm-new_index)*elm_size);
  934. }
  935. CopyFast(d+new_index*elm_size, temp, elm_size); // copy element from temp memory back to the data
  936. }
  937. }
  938. /******************************************************************************/
  939. // COMPARE
  940. /******************************************************************************/
  941. Bool EqualMem(CPtr a, CPtr b, IntPtr size)
  942. {
  943. if(size>0 && a!=b)return (a && b) ? !memcmp(a, b, size) : false;
  944. return true;
  945. }
  946. /******************************************************************************/
  947. }
  948. /******************************************************************************/
  949. Ptr operator new (size_t size) {return Alloc(size);}
  950. void operator delete(Ptr data)noexcept { Free (data);}
  951. /******************************************************************************/