EAMemory.cpp 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // Copyright (c) Electronic Arts Inc. All rights reserved.
  3. ///////////////////////////////////////////////////////////////////////////////
  4. #include <EAStdC/internal/Config.h>
  5. #include <EAStdC/EAMemory.h>
  6. #include <EAAssert/eaassert.h>
  7. // In optimized non-debug builds, we inline various functions.
  8. // We don't inline these functions in debug builds because in debug builds they
  9. // contain diagnostic code that can't be exposed in headers because that would the
  10. // user of this header to #include all the debug functionality headers, which isn't
  11. // feasible.
  12. #if !EASTDC_MEMORY_INLINE_ENABLED
  13. #include <EAStdC/internal/EAMemory.inl>
  14. #endif
  15. namespace EA
  16. {
  17. namespace StdC
  18. {
  19. ///////////////////////////////////////////////////////////////////////////
  20. // Deprecated functions
  21. //
  22. #if EASTDC_MEMCPY16_ENABLED
  23. // This function is deprecated. It was mistakenly created during a code migration.
  24. // It is scheduled for removal in a future version of this package.
  25. EASTDC_API char16_t* Memcpy(char16_t* pDestination, const char16_t* pSource, size_t nCharCount)
  26. {
  27. return (char16_t*)memcpy(pDestination, pSource, nCharCount * sizeof(char16_t));
  28. }
  29. #endif
  30. #if EASTDC_MEMCPY16_ENABLED
  31. // This function is deprecated. It was mistakenly created during a code migration.
  32. // It is scheduled for removal in a future version of this package.
  33. EASTDC_API char16_t* Memmove(char16_t* pDestination, const char16_t* pSource, size_t nCharCount)
  34. {
  35. return (char16_t*)memmove(pDestination, pSource, nCharCount * sizeof(char16_t));
  36. }
  37. #endif
  38. ///////////////////////////////////////////////////////////////////////////
  39. // rwstdc compatibility
  40. // These functions implement the same named function and argument types
  41. // as the corresponding functions from the rwstdc package.
  42. EASTDC_API void MemFill16(void* pDestination, uint16_t c, unsigned int byteCount)
  43. {
  44. Memfill16(pDestination, c, (size_t)byteCount);
  45. }
  46. EASTDC_API void MemFill32(void* pDestination, unsigned int c, unsigned int byteCount)
  47. {
  48. Memfill32(pDestination, (uint32_t)c, (size_t)byteCount);
  49. }
  50. EASTDC_API void MemFillSpecific(void* pDestination, const void* pSource, unsigned int destByteCount, unsigned int sourceByteCount)
  51. {
  52. MemfillSpecific(pDestination, pSource, (size_t)destByteCount, (size_t)sourceByteCount);
  53. }
  54. EASTDC_API uint16_t* Memset16(void* pDest, uint16_t c, size_t count)
  55. {
  56. // Instead of casting between types, we just create a union.
  57. union PointerUnion
  58. {
  59. void* mpVoid;
  60. uint16_t* mp16;
  61. uint32_t* mp32;
  62. uintptr_t mU;
  63. };
  64. PointerUnion p;
  65. p.mpVoid = pDest;
  66. EA_ASSERT((p.mU & 1) == 0);
  67. const uint16_t* const pEnd = (p.mp16 + count);
  68. if(count <= 32) // For small sizes, we simply do a little loop.
  69. {
  70. while(p.mp16 < pEnd)
  71. *p.mp16++ = c;
  72. }
  73. else
  74. {
  75. if(p.mU & 3) // If the address is not aligned on a 32 bit boundary.
  76. {
  77. *p.mp16++ = c; // Align it on a 32 bit boundary.
  78. count--;
  79. }
  80. // From here on we copy in 32 bit chunks for speed.
  81. count /= 2;
  82. const uint32_t c32 = (uint32_t)(c | (c << 16));
  83. while(count--)
  84. *p.mp32++ = c32;
  85. if(p.mp16 < pEnd)
  86. *p.mp16 = c;
  87. }
  88. return (uint16_t*)pDest;
  89. }
  90. EASTDC_API uint32_t* Memset32(void* pDest, uint32_t c, size_t count)
  91. {
  92. EA_ASSERT(((uintptr_t)pDest & 3) == 0);
  93. #if (EA_PLATFORM_WORD_SIZE >= 8) || (EA_PLATFORM_PTR_SIZE >= 8) // If we are using a 64 bit system...
  94. const uint32_t* const pEnd = (uint32_t*)pDest+count;
  95. uint32_t* pDest32 = (uint32_t*)pDest;
  96. uint64_t c64;
  97. if(count <= 16) // For small sizes, we simply do a little loop.
  98. {
  99. while(pDest32 < pEnd)
  100. *pDest32++ = c;
  101. }
  102. else
  103. {
  104. if(((uintptr_t)pDest32) & 7) // If the address is not aligned on a 64 bit boundary.
  105. {
  106. *pDest32++ = c; // Align it on a 64 bit boundary.
  107. count--;
  108. }
  109. uint64_t* pDest64 = (uint64_t*)pDest32; // From here on we copy in 64 bit chunks for speed.
  110. count /= 2;
  111. c64 = ((uint64_t)c | ((uint64_t)c << 32));
  112. while(count)
  113. {
  114. *pDest64++ = c64;
  115. count--;
  116. }
  117. if((uint32_t*)pDest64 < pEnd)
  118. *((uint32_t*)pDest64) = (uint32_t)c64;
  119. }
  120. #else
  121. uint32_t* cur = (uint32_t*)pDest;
  122. const uint32_t* const end = (uint32_t*)pDest + count;
  123. while(cur < end)
  124. *cur++ = c;
  125. #endif
  126. return (uint32_t*)pDest;
  127. }
  128. EASTDC_API uint64_t* Memset64(void* pDest, uint64_t c, size_t count)
  129. {
  130. EA_ASSERT(((uintptr_t)pDest & 7) == 0);
  131. uint64_t* cur = (uint64_t*)pDest;
  132. const uint64_t* const end = (uint64_t*)pDest + count;
  133. while(cur < end)
  134. *cur++ = c;
  135. return (uint64_t*)pDest;
  136. }
  137. EASTDC_API void* MemsetN(void* pDestination, const void* pSource, size_t sourceBytes, size_t count)
  138. {
  139. // This is a generic implementation. Pathways optimized for 24 bits and/or 128 bits might be desired.
  140. uint8_t* pDestination8 = (uint8_t*)pDestination;
  141. const uint8_t* pSource8 = (const uint8_t*)pSource;
  142. const uint8_t* pSource8Temp = pSource8;
  143. if(((sourceBytes & 3) == 0) && (((uintptr_t)pDestination & 3) == 0) && (((uintptr_t)pSource & 3) == 0))
  144. {
  145. // Pathway for 32-bit aligned copy
  146. size_t i = 0;
  147. while(count >= 4)
  148. {
  149. pSource8Temp = pSource8;
  150. for(i = 0; (i < sourceBytes) && (count >= 4); i += 4, count -= 4)
  151. {
  152. *((uint32_t*)pDestination8) = *(const uint32_t*)(pSource8Temp);
  153. pDestination8 += 4;
  154. pSource8Temp += 4;
  155. }
  156. }
  157. if(i == sourceBytes)
  158. i = 0;
  159. pSource8Temp = pSource8 + i;
  160. while(count-- >= 1)
  161. *pDestination8++ = *pSource8Temp++;
  162. }
  163. else // ((sourceBytes & 3) != 0)
  164. {
  165. // Pathway for non 32-bit aligned copy
  166. while(count >= 1)
  167. {
  168. pSource8Temp = pSource8;
  169. for(size_t i = 0; (i < sourceBytes) && (count >= 1); i++, count--)
  170. *pDestination8++ = *pSource8Temp++;
  171. }
  172. }
  173. return pDestination;
  174. }
  175. EASTDC_API const void* Memcheck8(const void* p, uint8_t c, size_t byteCount)
  176. {
  177. for(const uint8_t* p8 = (const uint8_t*)p; byteCount > 0; ++p8, --byteCount)
  178. {
  179. if(*p8 != c)
  180. return p8;
  181. }
  182. return NULL;
  183. }
  184. EASTDC_API const void* Memcheck16(const void* p, uint16_t c, size_t byteCount)
  185. {
  186. union U16 {
  187. uint16_t c16;
  188. uint8_t c8[2];
  189. };
  190. const U16 u = { c };
  191. size_t i = (size_t)((uintptr_t)p % 2);
  192. for(const uint8_t* p8 = (const uint8_t*)p, *p8End = (const uint8_t*)p + byteCount; p8 != p8End; ++p8, i ^= 1)
  193. {
  194. if(*p8 != u.c8[i])
  195. return p8;
  196. }
  197. return NULL;
  198. }
  199. EASTDC_API const void* Memcheck32(const void* p, uint32_t c, size_t byteCount)
  200. {
  201. union U32 {
  202. uint32_t c32;
  203. uint8_t c8[4];
  204. };
  205. const U32 u = { c };
  206. size_t i = (size_t)((uintptr_t)p % 4);
  207. // This code could be a little faster if it could work with an aligned
  208. // destination and do word compares. There are some pitfalls to be careful
  209. // of which may make the effort not worth it in practice for typical uses
  210. // of this code. In particular we need to make sure that word compares are
  211. // done with word-aligned memory, and that may mean using a version of
  212. // the c argument which has bytes rotated from their current position.
  213. for(const uint8_t* p8 = (const uint8_t*)p, *p8End = (const uint8_t*)p + byteCount; p8 != p8End; ++p8, i = (i + 1) % 4)
  214. {
  215. if(*p8 != u.c8[i])
  216. return p8;
  217. }
  218. return NULL;
  219. }
  220. EASTDC_API const void* Memcheck64(const void* p, uint64_t c, size_t byteCount)
  221. {
  222. union U64 {
  223. uint64_t c64;
  224. uint8_t c8[8];
  225. };
  226. const U64 u = { c };
  227. size_t i = (size_t)((uintptr_t)p % 8);
  228. for(const uint8_t* p8 = (const uint8_t*)p, *p8End = (const uint8_t*)p + byteCount; p8 != p8End; ++p8, i = (i + 1) % 8)
  229. {
  230. if(*p8 != u.c8[i])
  231. return p8;
  232. }
  233. return NULL;
  234. }
  235. EASTDC_API const char* Memchr(const char* p, char c, size_t nCharCount)
  236. {
  237. for(const char* p8 = (const char*)p; nCharCount > 0; ++p8, --nCharCount)
  238. {
  239. if(*p8 == c)
  240. return p8;
  241. }
  242. return NULL;
  243. }
  244. EASTDC_API const char16_t* Memchr16(const char16_t* pString, char16_t c, size_t nCharCount)
  245. {
  246. for(; nCharCount > 0; ++pString, --nCharCount)
  247. {
  248. if(*pString == c)
  249. return pString;
  250. }
  251. return NULL;
  252. }
  253. EASTDC_API const char32_t* Memchr32(const char32_t* pString, char32_t c, size_t nCharCount)
  254. {
  255. for(; nCharCount > 0; ++pString, --nCharCount)
  256. {
  257. if(*pString == c)
  258. return pString;
  259. }
  260. return NULL;
  261. }
  262. #if EASTDC_MEMCHR16_ENABLED
  263. EASTDC_API const char16_t* Memchr(const char16_t* pString, char16_t c, size_t nCharCount)
  264. {
  265. return Memchr16(pString, c, nCharCount);
  266. }
  267. #endif
  268. EASTDC_API int Memcmp(const void* pString1, const void* pString2, size_t nCharCount)
  269. {
  270. const char* p1 = (const char*)pString1;
  271. const char* p2 = (const char*)pString2;
  272. for(; nCharCount > 0; ++p1, ++p2, --nCharCount)
  273. {
  274. if(*p1 != *p2)
  275. return (*p1 < *p2) ? -1 : 1;
  276. }
  277. return 0;
  278. }
  279. #if EASTDC_MEMCPY16_ENABLED
  280. EASTDC_API int Memcmp(const char16_t* pString1, const char16_t* pString2, size_t nCharCount)
  281. {
  282. for(; nCharCount > 0; ++pString1, ++pString2, --nCharCount)
  283. {
  284. if(*pString1 != *pString2)
  285. return (*pString1 < *pString2) ? -1 : 1;
  286. }
  287. return 0;
  288. }
  289. #endif
  290. // Search for pFind/findSize within pMemory/memorySize.
  291. EASTDC_API void* Memmem(const void* pMemory, size_t memorySize, const void* pFind, size_t findSize)
  292. {
  293. EA_ASSERT((pMemory || !memorySize) && (pFind || !findSize)); // Verify that if pMemory or pFind is NULL, their respective size must be 0.
  294. const uint8_t* const pMemory8 = static_cast<const uint8_t*>(pMemory);
  295. const uint8_t* const pFind8 = static_cast<const uint8_t*>(pFind);
  296. const uint8_t* const pEnd8 = (pMemory8 + memorySize) - findSize;
  297. if(memorySize && (findSize <= memorySize))
  298. {
  299. if(findSize) // An empty pFind results in success, return pMemory.
  300. {
  301. for(const uint8_t* pCurrent8 = pMemory8; pCurrent8 <= pEnd8; ++pCurrent8) // This looping algorithm is not the fastest possible way to
  302. { // implement this function. A faster, but much more complex, algorithm
  303. if(EA_UNLIKELY(pCurrent8[0] == pFind8[0])) // Do a quick first char check. // might involve a two-way memory search (http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260).
  304. { // Another algorithm might be to start by searching for words instead of bytes, then use Memcmp.
  305. if(Memcmp(pCurrent8 + 1, pFind8 + 1, findSize - 1) == 0)
  306. return const_cast<uint8_t*>(pCurrent8);
  307. }
  308. }
  309. }
  310. else
  311. return const_cast<void*>(pMemory);
  312. }
  313. return NULL;
  314. }
  315. // This is a local function called by MemfillSpecific.
  316. static void Memfill24(void* pD, const void* pS, size_t byteCount)
  317. {
  318. unsigned char* pDestination = static_cast<unsigned char*>(pD);
  319. const unsigned char* pSource = static_cast<const unsigned char*>(pS);
  320. // Optimization wise, this function will assume that pDestination is already aligned 32-bit
  321. // Construct the 3 32-bit values
  322. unsigned int val8a = *(static_cast<const unsigned char*>(pSource));
  323. unsigned int val8b = *(static_cast<const unsigned char*>(pSource+1));
  324. unsigned int val8c = *(static_cast<const unsigned char*>(pSource+2));
  325. unsigned int val32a,val32b,val32c;
  326. #if defined(EA_SYSTEM_BIG_ENDIAN)
  327. val32a=(val8a*256*256*256)+(val8b*256*256)+(val8c*256)+val8a;
  328. val32b=(val8b*256*256*256)+(val8c*256*256)+(val8a*256)+val8b;
  329. val32c=(val8c*256*256*256)+(val8a*256*256)+(val8b*256)+val8c;
  330. #else
  331. val32a=val8a+(val8b*256)+(val8c*256*256)+(val8a*256*256*256);
  332. val32b=val8b+(val8c*256)+(val8a*256*256)+(val8b*256*256*256);
  333. val32c=val8c+(val8a*256)+(val8b*256*256)+(val8c*256*256*256);
  334. #endif
  335. // time to copy
  336. // we have to align the address to 32-bit, otherwise it's going to crash on the ps2!
  337. while (((reinterpret_cast<uintptr_t>(pDestination) & 0x03)!=0) && (byteCount>0))
  338. {
  339. byteCount--;
  340. // rotate the values over
  341. #if defined(EA_SYSTEM_BIG_ENDIAN)
  342. *(pDestination++)=static_cast<uint8_t>(val32a >> 24);
  343. unsigned int tmp = val32a;
  344. val32a=(val32a << 8) + (val32b >> 24);
  345. val32b=(val32b << 8) + (val32c >> 24);
  346. val32c=(val32c << 8) + (tmp >> 24);
  347. #else
  348. *(pDestination++)=static_cast<uint8_t>(val32a);
  349. unsigned int tmp = val32a;
  350. val32a=(val32a >> 8) + (val32b << 24);
  351. val32b=(val32b >> 8) + (val32c << 24);
  352. val32c=(val32c >> 8) + (tmp << 24);
  353. #endif
  354. }
  355. while (byteCount >= 12)
  356. {
  357. *(reinterpret_cast<unsigned int*>(pDestination)) = val32a;
  358. *(reinterpret_cast<unsigned int*>(pDestination+4)) = val32b;
  359. *(reinterpret_cast<unsigned int*>(pDestination+8)) = val32c;
  360. pDestination+=12;
  361. byteCount-=12;
  362. }
  363. while (byteCount >= 4)
  364. {
  365. *(reinterpret_cast<unsigned int*>(pDestination)) = val32a;
  366. pDestination+=4;
  367. byteCount-=4;
  368. val32a=val32b;
  369. val32b=val32c;
  370. }
  371. while (byteCount >= 1)
  372. {
  373. #if defined(EA_SYSTEM_BIG_ENDIAN)
  374. *pDestination = static_cast<uint8_t>(val32a >> 24);
  375. val32a = val32a << 8;
  376. #else
  377. *pDestination = static_cast<uint8_t>(val32a);
  378. val32a = val32a >> 8;
  379. #endif
  380. pDestination++;
  381. byteCount--;
  382. }
  383. }
  384. // This is a local function called by MemfillSpecific.
  385. static void MemfillAny(void* pD, const void* pS, size_t destByteCount, size_t sourceByteCount)
  386. {
  387. union Memory // Use a union to avoid memory aliasing problems in the compiler.
  388. {
  389. void* mpVoid;
  390. uint8_t* mp8;
  391. uint32_t* mp32;
  392. uint32_t m32;
  393. };
  394. Memory d;
  395. d.mpVoid = pD;
  396. Memory s;
  397. s.mpVoid = const_cast<void*>(pS);
  398. if (((sourceByteCount & 0x03) == 0) && ((d.m32 & 0x03) == 0) && ((s.m32 & 0x03) == 0))
  399. {
  400. // Routine for 32-bit aligned copy
  401. size_t i = 0;
  402. while (destByteCount >= 4)
  403. {
  404. s.mpVoid = const_cast<void*>(pS);
  405. for (i = 0; (i < sourceByteCount) && (destByteCount >= 4); i += 4, destByteCount -= 4)
  406. *d.mp32++ = *s.mp32++;
  407. }
  408. if (i == sourceByteCount)
  409. i = 0;
  410. s.mpVoid = const_cast<void*>(pS);
  411. s.mp8 += i;
  412. while (destByteCount >= 1)
  413. {
  414. *d.mp8++ = *s.mp8++;
  415. destByteCount--;
  416. }
  417. }
  418. else
  419. {
  420. // Routine for non 32-bit aligned copy
  421. while (destByteCount)
  422. {
  423. s.mpVoid = const_cast<void*>(pS);
  424. for (size_t i = 0; (i < sourceByteCount) && destByteCount; i++)
  425. {
  426. *d.mp8++ = *s.mp8++;
  427. destByteCount--;
  428. }
  429. }
  430. }
  431. }
  432. // This is a local function called by MemfillSpecific.
  433. static void Memfill128(void* pD, const void* pS, size_t byteCount)
  434. {
  435. unsigned char* pDestination = static_cast<unsigned char*>(pD);
  436. const unsigned char* pSource = static_cast<const unsigned char*>(pS);
  437. unsigned int v1;
  438. unsigned int v2;
  439. unsigned int v3;
  440. unsigned int v4;
  441. if ((reinterpret_cast<uintptr_t>(pSource) & 0x3) != 0)
  442. {
  443. // If the source is not aligned, we need to retrieve the values on a byte by byte basis
  444. #if defined(EA_SYSTEM_BIG_ENDIAN)
  445. v1 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource)))*256*256*256) +
  446. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+1)))*256*256) +
  447. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+2)))*256) +
  448. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+3))));
  449. v2 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+4)))*256*256*256) +
  450. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+5)))*256*256) +
  451. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+6)))*256) +
  452. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+7))));
  453. v3 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+8)))*256*256*256) +
  454. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+9)))*256*256) +
  455. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+10)))*256) +
  456. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+11))));
  457. v4 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+12)))*256*256*256) +
  458. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+13)))*256*256) +
  459. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+14)))*256) +
  460. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+15))));
  461. #else
  462. v1 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource)))) +
  463. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+1)))*256) +
  464. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+2)))*256*256) +
  465. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+3)))*256*256*256);
  466. v2 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+4)))) +
  467. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+5)))*256) +
  468. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+6)))*256*256) +
  469. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+7)))*256*256*256);
  470. v3 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+8)))) +
  471. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+9)))*256) +
  472. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+10)))*256*256) +
  473. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+11)))*256*256*256);
  474. v4 =(static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+12)))) +
  475. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+13)))*256) +
  476. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+14)))*256*256) +
  477. (static_cast<uint32_t>(*(static_cast<const uint8_t*>(pSource+15)))*256*256*256);
  478. #endif
  479. }
  480. else
  481. {
  482. v1 = *(reinterpret_cast<const uint32_t*>(pSource));
  483. v2 = *(reinterpret_cast<const uint32_t*>(pSource+4));
  484. v3 = *(reinterpret_cast<const uint32_t*>(pSource+8));
  485. v4 = *(reinterpret_cast<const uint32_t*>(pSource+12));
  486. }
  487. // Alignment correction
  488. if ((reinterpret_cast<uintptr_t>(pDestination) & 0xF) != 0)
  489. {
  490. // Perform 32-bit alignment (this is required for ps2, since it crashes when it writes a 32-bit
  491. // value to an unaligned 32-bit memory address)
  492. while (((reinterpret_cast<uintptr_t>(pDestination) & 0x03) != 0) && (byteCount>0))
  493. {
  494. byteCount--;
  495. // rotate the values over
  496. #if defined(EA_SYSTEM_BIG_ENDIAN)
  497. *(pDestination++)=static_cast<uint8_t>(v1 >> 24);
  498. unsigned int tmp = v1;
  499. v1=(v1 << 8) + (v2 >> 24);
  500. v2=(v2 << 8) + (v3 >> 24);
  501. v3=(v3 << 8) + (v4 >> 24);
  502. v4=(v4 << 8) + (tmp >> 24);
  503. #else
  504. *(pDestination++)=static_cast<uint8_t>(v1);
  505. unsigned int tmp = v1;
  506. v1=(v1 >> 8) + (v2 << 24);
  507. v2=(v2 >> 8) + (v3 << 24);
  508. v3=(v3 >> 8) + (v4 << 24);
  509. v4=(v4 >> 8) + (tmp << 24);
  510. #endif
  511. }
  512. if (byteCount >=256)
  513. {
  514. // not really worth performing all these functions if byteCount isn't large
  515. // Perform 128-bit alignment on 32-bit boundary
  516. unsigned int tempval,tempval2;
  517. switch (reinterpret_cast<uintptr_t>(pDestination)&0xC)
  518. {
  519. case 0xC:
  520. *reinterpret_cast<uint32_t*>(pDestination) = v1;
  521. pDestination+=4;
  522. byteCount-=4;
  523. tempval = v1;
  524. v1 = v2;
  525. v2 = v3;
  526. v3 = v4;
  527. v4 = tempval;
  528. break;
  529. case 0x8:
  530. *reinterpret_cast<uint32_t*>(pDestination) = v1;
  531. *reinterpret_cast<uint32_t*>(pDestination+4) = v2;
  532. pDestination+=8;
  533. byteCount-=8;
  534. tempval = v1;
  535. tempval2 = v3;
  536. v1 = tempval2;
  537. v3 = tempval;
  538. tempval = v2;
  539. tempval2 = v4;
  540. v2 = tempval2;
  541. v4 = tempval;
  542. break;
  543. case 0x4:
  544. *reinterpret_cast<uint32_t*>(pDestination) = v1;
  545. *reinterpret_cast<uint32_t*>(pDestination+4) = v2;
  546. *reinterpret_cast<uint32_t*>(pDestination+8) = v3;
  547. pDestination+=12;
  548. byteCount-=12;
  549. tempval = v4;
  550. v4 = v3;
  551. v3 = v2;
  552. v2 = v1;
  553. v1 = tempval;
  554. break;
  555. default:
  556. break;
  557. }
  558. }
  559. }
  560. // Start copying the stuff
  561. while (byteCount >= 16)
  562. {
  563. *(reinterpret_cast<uint32_t*>(pDestination)) = v1;
  564. *(reinterpret_cast<uint32_t*>(pDestination+4)) = v2;
  565. *(reinterpret_cast<uint32_t*>(pDestination+8)) = v3;
  566. *(reinterpret_cast<uint32_t*>(pDestination+12)) = v4;
  567. byteCount-=16;
  568. pDestination+=16;
  569. }
  570. if (byteCount > 0)
  571. {
  572. // end of destination not aligned to 128-bit
  573. unsigned int i = 0;
  574. while (byteCount >= 4)
  575. {
  576. *(reinterpret_cast<unsigned int*>(pDestination)) = v1;
  577. pDestination+=4;
  578. byteCount-=4;
  579. v1=v2;
  580. v2=v3;
  581. v3=v4;
  582. }
  583. #if defined(EA_SYSTEM_BIG_ENDIAN)
  584. for (i=0;(i<4) && (byteCount);i++)
  585. {
  586. *pDestination++ = static_cast<uint8_t>(v1 >> 24);
  587. v1 = v1 << 8;
  588. byteCount--;
  589. }
  590. #else
  591. // write the remainder for low-endian as long as the byteCount value allows it
  592. for (i=0;(i<4) && (byteCount!=0);i++)
  593. {
  594. *pDestination++ = static_cast<uint8_t>(v1);
  595. v1 = v1 >> 8;
  596. byteCount--;
  597. }
  598. #endif
  599. }
  600. }
  601. EASTDC_API void Memfill16(void* pDestination, uint16_t c, size_t byteCount)
  602. {
  603. Memfill32(pDestination, (uint32_t)((c << 16) + c), byteCount);
  604. }
  605. EASTDC_API void Memfill24(void* pDestination, uint32_t c, size_t byteCount)
  606. {
  607. const uint8_t c24[3] = { (uint8_t)(c >> 16), (uint8_t)(c >> 8), (uint8_t)c };
  608. Memfill24(pDestination, c24, byteCount);
  609. }
  610. #if defined(EA_PROCESSOR_X86) && defined(_MSC_VER)
  611. EASTDC_API __declspec(naked) void Memfill32(void* /*pDestination*/, uint32_t /*c*/, size_t /*byteCount*/)
  612. {
  613. __asm
  614. {
  615. mov eax,dword ptr [esp+4] ; pDestination
  616. mov edx,dword ptr [esp+8] ; c
  617. mov ecx,dword ptr [esp+12] ; byteCount
  618. sub ecx,32
  619. jns b32a
  620. jmp b32b
  621. align 16
  622. ; 32 byte filler
  623. b32a:
  624. sub ecx,32
  625. mov [eax],edx
  626. mov [eax+4],edx
  627. mov [eax+8],edx
  628. mov [eax+12],edx
  629. mov [eax+16],edx
  630. mov [eax+20],edx
  631. mov [eax+24],edx
  632. mov [eax+28],edx
  633. lea eax,[eax+32]
  634. jns b32a
  635. b32b:
  636. add ecx,32-8
  637. js b8b
  638. ; 8 byte filler
  639. b8a:
  640. mov [eax],edx
  641. mov [eax+4],edx
  642. add eax,8
  643. sub ecx,8
  644. jns b8a
  645. b8b:
  646. add ecx,8
  647. jne bend
  648. ret
  649. ; tail cleanup 4,2,1
  650. bend:
  651. cmp ecx,4
  652. jb be4
  653. mov [eax],edx
  654. add eax,4
  655. sub ecx,4
  656. be4:
  657. cmp ecx,2
  658. jb be2
  659. mov [eax],dx
  660. ror edx,16
  661. add eax,2
  662. sub ecx,2
  663. be2:
  664. cmp ecx,1
  665. jb be1
  666. mov [eax],dl
  667. inc eax
  668. dec ecx
  669. be1:
  670. ret
  671. }
  672. }
  673. #else
  674. EASTDC_API void Memfill32(void* pDestination, uint32_t c, size_t byteCount)
  675. {
  676. while (((reinterpret_cast<intptr_t>(pDestination) & 3) != 0) && (byteCount > 0))
  677. {
  678. #if defined(EA_SYSTEM_BIG_ENDIAN)
  679. *static_cast<uint8_t*>(pDestination) = static_cast<uint8_t>(c >> 24);
  680. pDestination = static_cast<void*>(static_cast<char *>(pDestination) + 1);
  681. c = (c << 8) + (c >> 24); // rotate the value
  682. #else
  683. *static_cast<uint8_t*>(pDestination) = static_cast<uint8_t>(c);
  684. pDestination = static_cast<void*>(static_cast<char *>(pDestination) + 1);
  685. c = (c << 24) + (c >> 8); // rotate the value
  686. #endif
  687. --byteCount;
  688. }
  689. if ((byteCount >= 4) && ((reinterpret_cast<intptr_t>(pDestination) & 4) != 0))
  690. {
  691. *static_cast<uint32_t*>(pDestination) = static_cast<uint32_t>(c);
  692. pDestination = static_cast<void*>(static_cast<char*>(pDestination) + 4);
  693. byteCount -= 4;
  694. }
  695. if (byteCount >= 64)
  696. {
  697. uint64_t c64 = (static_cast<uint64_t>(c) << static_cast<uint64_t>(32)) | static_cast<uint64_t>(c);
  698. do
  699. {
  700. (static_cast<uint64_t*>(pDestination))[0] = c64;
  701. (static_cast<uint64_t*>(pDestination))[1] = c64;
  702. (static_cast<uint64_t*>(pDestination))[2] = c64;
  703. (static_cast<uint64_t*>(pDestination))[3] = c64;
  704. (static_cast<uint64_t*>(pDestination))[4] = c64;
  705. (static_cast<uint64_t*>(pDestination))[5] = c64;
  706. (static_cast<uint64_t*>(pDestination))[6] = c64;
  707. (static_cast<uint64_t*>(pDestination))[7] = c64;
  708. pDestination = static_cast<void*> (static_cast<char*>(pDestination) + 64);
  709. byteCount -= 64;
  710. }
  711. while (byteCount >= 64);
  712. }
  713. if (byteCount >= 16)
  714. {
  715. do
  716. {
  717. (reinterpret_cast<uint32_t*>(pDestination))[0] = c;
  718. (reinterpret_cast<uint32_t*>(pDestination))[1] = c;
  719. (reinterpret_cast<uint32_t*>(pDestination))[2] = c;
  720. (reinterpret_cast<uint32_t*>(pDestination))[3] = c;
  721. pDestination = static_cast<void*> (static_cast<char*>(pDestination) + 16);
  722. byteCount -= 16;
  723. }
  724. while (byteCount >= 16);
  725. }
  726. if (byteCount >= 4)
  727. {
  728. do
  729. {
  730. *static_cast<uint32_t*>(pDestination) = static_cast<uint32_t> (c);
  731. pDestination = static_cast<void*>(static_cast<char*>(pDestination) + 4);
  732. byteCount -= 4;
  733. }
  734. while (byteCount >= 4);
  735. }
  736. while (byteCount >= 1)
  737. {
  738. #if defined(EA_SYSTEM_BIG_ENDIAN)
  739. *static_cast<uint8_t*>(pDestination) = static_cast<uint8_t> (c >> 24);
  740. pDestination = static_cast<void*>(static_cast<char*>(pDestination) + 1);
  741. c = c << 8;
  742. #else
  743. *static_cast<uint8_t*>(pDestination) = static_cast<uint8_t> (c);
  744. pDestination = static_cast<void*>(static_cast<char*>(pDestination) + 1);
  745. c = c >> 8;
  746. #endif
  747. --byteCount;
  748. }
  749. }
  750. #endif
  751. EASTDC_API void Memfill64(void* pDestination, uint64_t c, size_t byteCount)
  752. {
  753. MemfillAny(pDestination, &c, byteCount, sizeof(c));
  754. }
  755. EASTDC_API void Memfill8(void* pDestination, uint8_t c, size_t byteCount)
  756. {
  757. Memset8(pDestination, c, byteCount);
  758. }
  759. EASTDC_API void MemfillSpecific(void* pDestination, const void* pSource, size_t destByteCount, size_t sourceByteCount)
  760. {
  761. switch (sourceByteCount)
  762. {
  763. case 1:
  764. {
  765. const uint8_t c = *static_cast<const uint8_t*>(pSource);
  766. Memset8(pDestination, c, destByteCount);
  767. break;
  768. }
  769. case 2:
  770. {
  771. const uint16_t c = *static_cast<const uint16_t*>(pSource);
  772. Memfill16(pDestination, c, destByteCount);
  773. break;
  774. }
  775. case 3:
  776. Memfill24(pDestination, pSource, destByteCount);
  777. break;
  778. case 4:
  779. {
  780. uint32_t c = *static_cast<const uint32_t*>(pSource);
  781. Memfill32(pDestination, c, destByteCount);
  782. break;
  783. }
  784. case 8:
  785. default:
  786. MemfillAny(pDestination, pSource, destByteCount, sourceByteCount);
  787. break;
  788. case 16:
  789. Memfill128(pDestination, pSource, destByteCount);
  790. break;
  791. }
  792. }
  793. // Has similar behavior to the Unix bcmp function but executes the same instructions every
  794. // time and thus executes in the same amount of time for a given byteCount. Assumes that the
  795. // CPU executes the instructions below equivalently for all input byte combinations,
  796. // as is usually the case for logical integer operations.
  797. EASTDC_API bool TimingSafeMemEqual(const void* pMem1, const void* pMem2, size_t byteCount)
  798. {
  799. const char* p1 = (const char*)pMem1;
  800. const char* p2 = (const char*)pMem2;
  801. char mask = 0;
  802. for(; byteCount > 0; ++p1, ++p2, --byteCount)
  803. mask |= (*p1 ^ *p2); // Accumulate any differences between the memory.
  804. return (mask == 0); // Concern: If the compiler sees the contents of pMem1 and pMem2 then it may optimize away the code above. In practice the compiler won't be able to see that in the use cases that matter to users.
  805. }
  806. // Has the same behavior as memcmp, but executes the same instructions every time and
  807. // thus executes in the same amount of time for a given byteCount. Assumes that the
  808. // CPU executes the instructions below equivalently for all input byte combinations,
  809. // as is usually the case for logical integer operations.
  810. EASTDC_API int TimingSafeMemcmp(const void* pMem1, const void* pMem2, size_t byteCount)
  811. {
  812. const uint8_t* p1 = static_cast<const uint8_t*>(pMem1);
  813. const uint8_t* p2 = static_cast<const uint8_t*>(pMem2);
  814. int result = 0;
  815. while(byteCount--) // Walk through the bytes from back to front and recalculate the difference if one is encountered.
  816. {
  817. const int c1 = p1[byteCount];
  818. const int c2 = p2[byteCount];
  819. const int mask = (((c1 ^ c2) - 1) >> 8); // The result of the following is that mask is -1 (*p1 == *p2) or 0 (*p1 != *p2).
  820. result &= mask; // If (*p1 == *p2) then mask is 0xffffffff and result is unchanged. Else result will is reset to 0 (to be updated on the next line).
  821. result += (c1 - c2); // If (*p1 == *p2) then this adds 0 and result is unchanged. Else result will be (*p1 - *p2).
  822. } // The return value will be equal to the difference of the first unequal bytes.
  823. return result;
  824. }
  825. EASTDC_API bool TimingSafeMemIsClear(const void* pMem, size_t byteCount)
  826. {
  827. uint32_t mask = 0;
  828. const uint8_t* p = static_cast<const uint8_t*>(pMem);
  829. while(byteCount--)
  830. mask |= *p++;
  831. return (mask == 0); // Concern: If the compiler sees the contents of pMem then it may optimize away the code above. In practice the compiler won't be able to see that in the use cases that matter to users.
  832. }
  833. } // namespace StdC
  834. } // namespace EA