Roberto Parolin před 6 roky
revize
6a6c687479
100 změnil soubory, kde provedl 27340 přidání a 0 odebrání
  1. 1 0
      .p4ignore
  2. 21 0
      3RDPARTYLICENSES.TXT
  3. 20 0
      CONTRIBUTING.md
  4. 55 0
      doc/AdditionalReading.html
  5. 159 0
      doc/Callstack.html
  6. 13 0
      doc/ClassDocumentation.html
  7. 85 0
      doc/DesignConsiderations.html
  8. 39 0
      doc/Readme.html
  9. 92 0
      doc/UTFDoc.css
  10. 364 0
      doc/UserGuide.html
  11. 210 0
      doc/doxygen/eathread.doxygen.config
  12. 226 0
      include/eathread/android/eathread_atomic_android.h
  13. 221 0
      include/eathread/android/eathread_atomic_android_c11.h
  14. 259 0
      include/eathread/apple/eathread_atomic_apple.h
  15. 69 0
      include/eathread/apple/eathread_callstack_apple.h
  16. 61 0
      include/eathread/apple/eathread_sync_apple.h
  17. 54 0
      include/eathread/arm/eathread_sync_arm.h
  18. 26 0
      include/eathread/armgcc/eathread_sync_armgcc.h
  19. 208 0
      include/eathread/cpp11/eathread_atomic_cpp11.h
  20. 826 0
      include/eathread/eathread.h
  21. 480 0
      include/eathread/eathread_atomic.h
  22. 249 0
      include/eathread/eathread_barrier.h
  23. 347 0
      include/eathread/eathread_callstack.h
  24. 524 0
      include/eathread/eathread_callstack_context.h
  25. 254 0
      include/eathread/eathread_condition.h
  26. 797 0
      include/eathread/eathread_futex.h
  27. 323 0
      include/eathread/eathread_list.h
  28. 341 0
      include/eathread/eathread_mutex.h
  29. 302 0
      include/eathread/eathread_pool.h
  30. 221 0
      include/eathread/eathread_rwmutex.h
  31. 430 0
      include/eathread/eathread_rwmutex_ip.h
  32. 253 0
      include/eathread/eathread_rwsemalock.h
  33. 408 0
      include/eathread/eathread_rwspinlock.h
  34. 452 0
      include/eathread/eathread_rwspinlockw.h
  35. 339 0
      include/eathread/eathread_semaphore.h
  36. 319 0
      include/eathread/eathread_spinlock.h
  37. 362 0
      include/eathread/eathread_storage.h
  38. 272 0
      include/eathread/eathread_sync.h
  39. 802 0
      include/eathread/eathread_thread.h
  40. 190 0
      include/eathread/gcc/eathread_atomic_gcc.h
  41. 73 0
      include/eathread/gcc/eathread_sync_gcc.h
  42. 29 0
      include/eathread/internal/atomic.h
  43. 634 0
      include/eathread/internal/config.h
  44. 241 0
      include/eathread/internal/deprecated.h
  45. 15 0
      include/eathread/internal/dllinfo.h
  46. 143 0
      include/eathread/internal/eathread_atomic.h
  47. 36 0
      include/eathread/internal/eathread_atomic_standalone.h
  48. 199 0
      include/eathread/internal/eathread_atomic_standalone_gcc.h
  49. 249 0
      include/eathread/internal/eathread_atomic_standalone_msvc.h
  50. 32 0
      include/eathread/internal/eathread_global.h
  51. 50 0
      include/eathread/internal/timings.h
  52. 559 0
      include/eathread/powerpc/eathread_atomic_powerpc.h
  53. 31 0
      include/eathread/powerpc/eathread_sync_powerpc.h
  54. 431 0
      include/eathread/shared_array_mt.h
  55. 472 0
      include/eathread/shared_ptr_mt.h
  56. 47 0
      include/eathread/version.h
  57. 462 0
      include/eathread/x86-64/eathread_atomic_x86-64.h
  58. 108 0
      include/eathread/x86-64/eathread_sync_x86-64.h
  59. 742 0
      include/eathread/x86/eathread_atomic_x86.h
  60. 89 0
      include/eathread/x86/eathread_sync_x86.h
  61. 21 0
      source/android/com_ea_EAThread_EAThread.h
  62. 10 0
      source/android/eathread_android.cpp
  63. 84 0
      source/android/eathread_fake_atomic_64.cpp
  64. 226 0
      source/android/eathread_semaphore_android.cpp
  65. 829 0
      source/apple/eathread_callstack_apple.cpp
  66. 163 0
      source/apple/eathread_semaphore_apple.cpp
  67. 412 0
      source/arm/eathread_callstack_arm.cpp
  68. 217 0
      source/cpp11/eathread_cpp11.cpp
  69. 97 0
      source/cpp11/eathread_mutex_cpp11.cpp
  70. 5 0
      source/cpp11/eathread_semaphore_cpp11.cpp
  71. 488 0
      source/cpp11/eathread_thread_cpp11.cpp
  72. 42 0
      source/deprecated.cpp
  73. 254 0
      source/eathread.cpp
  74. 170 0
      source/eathread_atomic.cpp
  75. 194 0
      source/eathread_barrier.cpp
  76. 36 0
      source/eathread_callstack.cpp
  77. 271 0
      source/eathread_condition.cpp
  78. 335 0
      source/eathread_futex.cpp
  79. 144 0
      source/eathread_mutex.cpp
  80. 711 0
      source/eathread_pool.cpp
  81. 263 0
      source/eathread_rwmutex.cpp
  82. 361 0
      source/eathread_rwmutex_ip.cpp
  83. 351 0
      source/eathread_semaphore.cpp
  84. 354 0
      source/eathread_storage.cpp
  85. 262 0
      source/eathread_thread.cpp
  86. 178 0
      source/kettle/eathread_barrier_kettle.cpp
  87. 557 0
      source/kettle/eathread_callstack_kettle.cpp
  88. 121 0
      source/kettle/eathread_condition_kettle.cpp
  89. 393 0
      source/kettle/eathread_kettle.cpp
  90. 199 0
      source/kettle/eathread_mutex_kettle.cpp
  91. 61 0
      source/kettle/eathread_pthread_stack_info.cpp
  92. 177 0
      source/kettle/eathread_semaphore_kettle.cpp
  93. 799 0
      source/kettle/eathread_thread_kettle.cpp
  94. 687 0
      source/libunwind/eathread_callstack_libunwind.cpp
  95. 122 0
      source/null/eathread_callstack_null.cpp
  96. 140 0
      source/openkode/eathread_semaphore_openkode.cpp
  97. 536 0
      source/pc/eathread_callstack_win32.cpp
  98. 622 0
      source/pc/eathread_callstack_win64.cpp
  99. 221 0
      source/pc/eathread_mutex_pc.cpp
  100. 911 0
      source/pc/eathread_pc.cpp

+ 1 - 0
.p4ignore

@@ -0,0 +1 @@
+tags

+ 21 - 0
3RDPARTYLICENSES.TXT

@@ -0,0 +1,21 @@
+Additional licenses also apply to this software package as detailed below.
+
+--------------------------------------------------------------------------
+Copyright (c) 2015 Jeff Preshing
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgement in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+--------------------------------------------------------------------------

+ 20 - 0
CONTRIBUTING.md

@@ -0,0 +1,20 @@
+## Contributing
+
+Before you can contribute, EA must have a Contributor License Agreement (CLA) on file that has been signed by each contributor.
+You can sign here: [Go to CLA](https://electronicarts.na1.echosign.com/public/esignWidget?wid=CBFCIBAA3AAABLblqZhByHRvZqmltGtliuExmuV-WNzlaJGPhbSRg2ufuPsM3P0QmILZjLpkGslg24-UJtek*)
+
+### Pull Request Policy
+
+All code contributions to EASTL are submitted as [Github pull requests](https://help.github.com/articles/using-pull-requests/).  All pull requests will be reviewed by an EASTL maintainer according to the guidelines found in the next section.
+
+Your pull request should:
+
+* merge cleanly
+* come with tests
+	* tests should be minimal and stable
+	* fail before your fix is applied
+* pass the test suite
+* code formatting is encoded in clang format
+	* limit using clang format on new code
+	* do not deviate from style already established in the files
+

+ 55 - 0
doc/AdditionalReading.html

@@ -0,0 +1,55 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>EAThread Additional Reading</title>
+  
+  <meta name="description" content="Provides references to additional documents regarding multithreading."></head><body>
+  <h1>Additional Reading</h1>
+  <p>One of the best ways to become familiar with the many facets of
+  multithreading is to read the current literature. This literature
+  consists of books, technical papers, online discussions and FAQs. Here
+  are a few significant links regarding multithreaded programming in
+  general. Note that much of the online dicussions regarding
+  multithreading is Unix-centric and views by authors are sometimes
+  unreasonably hostile to non-Unix-centric multithreading paradigms.</p>
+  <h3>Books about threading</h3>
+  <p><a href="http://www.amazon.com/exec/obidos/tg/detail/-/0131900676">Thread Time: The MultiThreaded Programming Guide</a>, by Norton and DiPasquale<br>
+    <a href="http://www.amazon.com/exec/obidos/tg/detail/-/0201633922/002-3929909-2459261?v=glance">Programming&nbsp; with Posix Threads</a>, by Butenhof<br>
+    <a href="http://www.amazon.com/exec/obidos/tg/detail/-/0134436989/">Threads Primer: A Guide to Multithreaded Programming</a>, by Lewis and Berg<br>
+    <span class="sans"><a href="http://www.amazon.com/exec/obidos/tg/detail/-/0201310090">Concurrent Programming in Java(TM): Design Principles and Pattern</a>, by Lea</span></p>
+  <h3>Online Posix threading documentation</h3>
+  <p><a href="http://www.opengroup.org/onlinepubs/007904975/basedefs/pthread.h.html">http://www.opengroup.org/onlinepubs/007904975/basedefs/pthread.h.html</a></p>
+  <h3>Windows threading</h3>
+  <p><a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/processes_and_threads.asp">http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/processes_and_threads.asp</a></p>
+  <h3>Linux threading</h3>
+  <p><a href="http://kerneltrap.org/node/422">http://kerneltrap.org/node/422</a></p>
+  <h3>Macintosh threading </h3>
+  <p><a href="http://developer.apple.com/macosx/multithreadedprogramming.html">http://developer.apple.com/macosx/multithreadedprogramming.html</a><br>
+    <a href="http://developer.apple.com/technotes/tn/tn2028.html">http://developer.apple.com/technotes/tn/tn2028.html </a> </p>
+  <h3>Discussions</h3>
+  <p><a href="news://comp.programming.threads">news://comp.programming.threads</a> (note that this discussion group is marred by the presence of some trolls) <br>
+    <a href="http://www.talkaboutprogramming.com/group/comp.programming.threads/">http://www.talkaboutprogramming.com/group/comp.programming.threads/</a> <small>(same as usenet but more accessible for some)</small></p>
+  <h3>FAQs</h3>
+  <p><a href="http://www.openmp.org/index.cgi?faq">http://www.openmp.org/index.cgi?faq</a><br>
+    <a href="http://www.lambdacs.com/cpt/MFAQ.html">http://www.lambdacs.com/cpt/MFAQ.html</a><br>
+    <a href="http://www.lambdacs.com/cpt/FAQ.html">http://www.lambdacs.com/cpt/FAQ.html</a></p>
+  <h3>Third-Party Threading Libraries</h3>
+  <p>ACE: <a href="http://www.cs.wustl.edu/%7Eschmidt/ACE-overview.html">http://www.cs.wustl.edu/~schmidt/ACE-overview.html</a><br>
+    Boost: <a href="http://www.boost.org/libs/thread/doc/index.html">http://www.boost.org/libs/thread/doc/index.html</a><br>
+    ZThreads: <a href="http://zthread.sourceforge.net/html/hierarchy.html">http://zthread.sourceforge.net/html/hierarchy.html</a></p>
+  <h3>Miscellaneous Reading</h3>
+  <p><a href="http://www-106.ibm.com/developerworks/java/library/j-king.html">http://www-106.ibm.com/developerworks/java/library/j-king.html</a><br>
+    <br>
+  </p>
+  <hr style="width: 100%; height: 2px;">End of document<br>
+
+
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+</body></html>

+ 159 - 0
doc/Callstack.html

@@ -0,0 +1,159 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+    <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
+    <title>EACallstack</title>    
+	<link type="text/css" rel="stylesheet" href="UTFDoc.css">
+    <meta name="author" content="Paul Schlegel">
+    <style type="text/css">
+<!--
+.style1 {color: #0033CC}
+-->
+    </style>
+</head>
+<body bgcolor="#FFFFFF">
+<h1>EACallstack</h1>
+<h2>Introduction</h2>
+<p>The EACallstack package includes functionality to retrieve callstacks at runtime, convert callstacks to symbol names, locations, or source code, disassemble code, read PS3 (eventually other platform) crash dumps, read machine thread contexts, and related functionality. It supports code in DLLs (or other platform equivalents).</p>
+<p>EACallstack works on the following platforms:
+<ul>
+<li>PS3</li>
+<li>XBox 360</li>
+<li>Wii</li>
+<li>Windows / x86, x64</li>
+<li>Macintosh / x86, x64</li>
+<li>Linux / x86, x64</li>
+<li>Mobile / ARM</li>
+<li>PS2</li>
+</ul>
+</p>
+<p>The Callstack namespace is the primary namespace of the EACallstack package. 
+It defines the methods used to initialize and shut down EACallstack, and it 
+defines methods for the following functions that are fundamental to all 
+EACallstack operations:</p>
+<ul>
+	<li>To get the value of the current instruction pointer</li>
+	<li>To get the callstack, expressed as an array of instruction pointers, for 
+	any thread</li>
+</ul>
+<p>All of these functions return instruction pointers. In order to obtain 
+human-readable debugging information, an instruction pointer can be passed to 
+other classes within EACallstack, such as <a href="EAAddressRep.html">
+EAAddressRep</a> or <a href="EADasm.html">EADasm</a>.
+<a href="EAAddressRep.html">EAAddressRep</a> can be used for looking up symbol 
+information such as the function name, source file, line number, and source code 
+text associated with a program address. <a href="EADasm.html">EADasm</a> can be used 
+for disassembling machine code at a program address.</p>
+<p>By using the functions defined in the Callstack namespace in conjunction 
+with other classes defined in EACallstack, it is possible for an executable 
+running on a console platform to construct failure reports with a wide variety 
+of debugging information that can be immediately useful to a developer, without 
+the need for external symbol lookup tools or other translation steps.</p>
+<h2>Example usage </h2>
+<p>Here's example usage for how to use GetCallstack:</p>
+<pre class="code-example">void YourApp::Initialize()
+{
+    #ifdef EA_DEBUG // EACallstack is likely for debug-only configurations.
+<span class="style1">        Callstack::InitCallstack();
+<font color="#000000">    #endif</font></span>
+}
+
+void YourApp::OutputCallstack()
+{
+    void*  pCallstack[32];
+    size_t nCallstackDepth = <font color="#0033CC">GetCallstack(pCallstack, 32, NULL); </font>// NULL == use current thread context
+
+    for(size_t i = 0; i &lt; nCallstackDepth; ++i)
+    {
+        const void* const pCallstackAddress = pCallstack[i];
+
+        // Get the symbol information for pCallstackAddress via GetAddressRep here.
+        // See the documentation for <a href="EAAddressRep.html">EAAddressRep</a>.
+        //
+        // ... and/or ...
+        //
+        // Get the machine code disassembly for pCallstackAddress via EADasm here.
+        // See the documentation for <a href="EADasm.html">EADasm</a>.
+     }
+}
+
+void YourApp::Shutdown()
+{
+    #ifdef EA_DEBUG
+<span class="style1">        Callstack::ShutdownCallstack();
+<font color="#000000">    #endif</font></span>
+}</pre>
+<h2>Interface</h2>
+<p>Methods for the initialization and shutdown of the EACallstack package, 
+defined in the EACallstack.h header file:</p>
+<pre class="code-example"><span class="code-example-comment">/// InitCallstack
+///
+/// Allows the user to explicitly initialize the callstack mechanism.
+/// Only the first call to InitCallstack will have effect. Calls to 
+/// InitCallstack must be matched by calls to ShutdownCallstack.
+///
+</span>void InitCallstack();
+
+<span class="code-example-comment">/// ShutdownCallstack
+///
+/// Allows the user to explicitly shutdown the callstack mechanism.
+/// Calls to InitCallstack must be matched by calls to ShutdownCallstack.
+/// The last call to ShutdownCallstack will shutdown and free the callstack mechanism.
+///
+</span>void ShutdownCallstack();</pre>
+<p>The GetCallstack function:</p>
+<pre class="code-example"><span class="code-example-comment">/// GetCallstack
+///
+/// Gets the addresses of the calling instructions of a call stack.
+/// If the CallstackContext parameter is used, then that execution context is used;
+/// otherwise the current execution context is used.
+/// The return value is the number of entries written to the callstack array.
+/// The item at callstack[0] is always the address of the instruction calling the
+/// GetCallstack function. This is conceptually identical to placing a breakpoint in
+/// a debugger at the point where the GetCallstack function is called.
+/// The maxDepth parameter must be at least one.
+///
+</span>size_t GetCallstack(void* callstack[], size_t maxDepth, CallstackContext* pContext = NULL);</pre>
+<p>The GetCallstackContext function, for use when obtaining the callstack of a 
+particular thread:</p>
+<pre class="code-example"><span class="code-example-comment">/// GetCallstackContext
+///
+/// Gets the CallstackContext associated with the given thread.
+/// The thread must be in a non-running state.
+/// If the threadID is EAThread::kThreadIdInvalid, the current thread context is retrieved.
+/// The threadId parameter is the same type as an EAThread ThreadId. It is important to 
+/// note that an EAThread ThreadId under Microsoft platforms is a thread handle and not what 
+/// Microsoft calls a thread id. This is by design as Microsoft thread ids are second class
+/// citizens and likely wouldn't exist if it not were for quirks in the Windows API evolution.
+///
+</span>bool GetCallstackContext(CallstackContext&amp; context, intptr_t threadId = 0);</pre>
+<p>The EAGetInstructionPointer macro:</p>
+<pre class="code-example"><span class="code-example-comment">/// EAGetInstructionPointer
+///
+/// Returns the current instruction pointer (a.k.a. program counter).
+/// This function is implemented as a macro, it acts as if its declaration 
+/// were like so:
+/// void EAGetInstructionPointer(void*&amp; p);
+///
+/// For portability, this function should only be used as a standalone 
+/// statement on its own line.
+///
+/// Example usage:
+///     void* pInstruction;
+///     EAGetInstructionPointer(pInstruction);
+///
+</span>&lt;... implementation not shown ...&gt;</pre>
+<hr>
+<p>&nbsp;</p>
+<p>&nbsp;</p>
+<p>&nbsp;</p>
+<p>&nbsp;</p>
+<p>&nbsp;</p>
+<p>&nbsp;</p>
+<p>&nbsp;</p>
+<p></p>
+</body>
+</html>
+
+
+

+ 13 - 0
doc/ClassDocumentation.html

@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
+<meta http-equiv="Refresh" content="0; URL=doxygen/html/index.html"><title>Class Documentation</title>
+
+<link href="doxygen.css" rel="stylesheet" type="text/css"></head>
+
+<body>
+<!-- Generated by Doxygen 1.3.3 -->
+<h1>Class Documentation</h1>
+<p>This page is set to redirect you to the <a href="doxygen/html/index.html">EAThread doxygen documentation</a>. If you are not taken then there you can manually click the link here yourself. If the documentation is not present, then that means you need to manually generate it with doxygen, as not all EAThread distributions come with built Doxygen documentation. Simply run &quot;doxygen.exe doxygen/eathread.doxygen.config&quot; to build the documentation. </p>
+<p>&nbsp;</p>
+</body></html>

+ 85 - 0
doc/DesignConsiderations.html

@@ -0,0 +1,85 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>Design Considerations</title>
+<style type="text/css">
+<!--
+.style1 {font-family: "Courier New", Courier, mono}
+-->
+</style>
+</head><body>
+<h1>Design Considerations</h1>
+<h3>Design</h3>
+<p>Many of the design criteria for EA::Thread is based on the design of
+  the Posix threading standard. The Posix threading standard is designed
+  to work portably on a wide range of operating systems and hardware,
+  including embedded systems and realtime environments. As such, Posix
+  threads generally represent a competent model to follow where possible.
+  Windows and various other platforms have independent multi-threading
+  systems which are taken into account here as well. If something exists
+  in Windows but doesn't exist here (e.g. Thread suspend/resume), there
+  is a decent chance that it is by design and for some good reason.</p>
+<h3>C++</h3>
+<p>There are a number of C++ libraries devoted to multithreading. Usually
+  the goal of these libraries is provide a platform independent interface
+  which simplifies the most common usage patterns and helps prevent
+  common errors. Some of these libraries are basic wrappers around
+  existing C APIs while others (e.g. ZThreads) provide a new and
+  different paradigm. We take the former approach here, as it is provides
+  more or less the same functionality but provides it in a
+  straightforward way that is easily approached by those familiar with
+  platform-specific APIs. This approach has been referred to as the "Wrapper Facade Pattern".</p>
+<h3>Condition Variables / Monitors</h3>
+<p>Posix condition variables are implemented via the Condition class.
+  For all practical purposes, "monitor" is the Java and C# name for Posix' condition variables. To
+  some, a condition variable may seem similar to a Win32 Signal. In
+  actuality they are similar but there is one critical difference: a
+  Signal does not atomically unlock a mutex as part of the signaling
+  process. This results in problematic race conditions that make reliable
+  producer/consumer systems impossible to implement correctly.</p>
+<h3>Events / Signals</h3>
+<p>EAThread
+  doesn't have an Event or Signal because it is not useful for most
+  practical
+  situations. You usually instead want to use a Semaphore or Condition.
+  An
+  Event as defined by Windows is not the same thing as a Condition
+  (condition variable) and
+  cannot be safely used in its place. Events cannot be used to do what a
+  Condition does primarily due to race conditions. There may nevertheless
+  be some use for events, though they won't be implemented in EAThread
+  until and unless deemed useful. Given that Posix threading has
+  undergone numerous scrutinized revisions without adding an event
+  system, it is probably arguable that events are not necessary. A
+  publicly available discussion on the topic of implementing events under
+  Posix threads which could be applied to EAThread is here: <a href="http://developers.sun.com/solaris/articles/waitfor_api.html">http://developers.sun.com/solaris/articles/waitfor_api.html</a>. Check the EAThread package's scrap directory for a possible implementation of events in EAThread in the future.</p>
+<h3>Timeouts</h3>
+<p>Timeouts are specified as absolute times and not relative times. This
+  may not be how Win32 threading works but it is what's proper and is how
+  Posix threading works. From the OpenGroup <a href="http://www.opengroup.org/onlinepubs/007904975/functions/pthread_cond_wait.html">online</a> pthread (Posix) documentation:<br>
+</p>
+<div style="margin-left: 40px;"> An absolute time measure was chosen for
+specifying the timeout parameter for two reasons. First, a relative
+time measure can be easily implemented on top of a function that
+specifies absolute time, but there is a race condition associated with
+specifying an absolute timeout on top of a function that specifies
+relative timeouts. For example, assume that clock_gettime() returns the
+current time and cond_relative_timed_wait() uses relative timeouts:<br>
+<br>
+<span class="style1">&nbsp;&nbsp; clock_gettime(CLOCK_REALTIME, &amp;now);<br style="font-family: monospace;">
+&nbsp;&nbsp; reltime = sleep_til_this_absolute_time - now;<br style="font-family: monospace;">
+&nbsp;&nbsp; cond_relative_timed_wait(c, m, &amp;reltime);<br>
+</span><br style="font-family: monospace;">
+If the thread is preempted between the first statement and the
+last statement, the thread blocks for too long. Blocking, however, is
+irrelevant if an absolute timeout is used. An absolute timeout also
+need not be recomputed if it is used multiple times in a loop, such as
+that enclosing a condition wait. For cases when the system clock is
+advanced discontinuously by an operator, it is expected that
+implementations process any timed wait expiring at an intervening time
+as if that time had actually occurred.<br>
+</div>
+<br>
+<br>
+<br>
+
+
+</body></html>

+ 39 - 0
doc/Readme.html

@@ -0,0 +1,39 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>ReadMe</title></head><body>
+<h1>ReadMe</h1>
+<h3>What is EAThread</h3>
+<p>EAThread is a package that implements a unified cross-platform
+  interface for multithreaded programming on various platforms. The
+  implementation is clean, efficient, and comprehensive. The package is
+  suitable for basic threading needs such as the creation of multiple
+  threads and mutual exclusion. It additionally contains features that
+  are suitable for advanced threading needs required by next generation
+  console platforms and advanced uses with existing PC and server
+  platforms.</p>
+<h3>Legal</h3>
+<p>EAThread is usable for all uses within Electronic Arts, both internally
+  and in shipping products for all platforms. All source code was written
+  by a single EA engineer and none of the source code comes from an
+  external source. </p>
+<h3>This Documentation</h3>
+<p>Each of the documents in this directory stands alone, though some of
+  the documents have links to others. Simply open any of the HTML
+  documents with your browser to read the given file. </p>
+<h3>Where to Go Next</h3>
+<p>If this is the first document you are reading then the next document you will want to read is the <a href="./UserGuide.html">User Guide</a>. <br>
+  
+  <br>
+  
+</p>
+<hr style="width: 100%; height: 2px;">End of document<br>
+
+
+
+<br>
+
+
+<br>
+
+
+
+</body></html>

+ 92 - 0
doc/UTFDoc.css

@@ -0,0 +1,92 @@
+body 
+{
+	font-family: Palatino Linotype, Book Antiqua, Times New Roman;
+	font-size: 11pt;
+}
+
+h1
+{
+	font-family: Verdana;
+	display: block;
+	background-color: #FFF0B0;
+	border: solid 2px black;
+	font-size: 16pt;
+	font-weight: bold;
+	padding: 6px;
+}
+
+h2 
+{
+	font-size: 14pt;
+	font-family: Verdana;
+	border-bottom: 2px solid black;
+}
+
+h3
+{
+	font-family: Verdana;
+	font-size: 13pt;
+	font-weight: bold;
+}
+
+.code-example 
+{
+	display: block;
+	background-color: #e0e0f0;
+	margin-left: 3em;
+	margin-right: 3em;
+	margin-top: 1em;
+	margin-bottom: 1em;
+	padding: 8px;
+	border: solid 2px #a0a0d0;
+	font-family: monospace;
+	font-size: 10pt;
+	white-space: pre;
+}
+
+.code-example-span 
+{
+	font-family: monospace;
+	font-size: 10pt;
+	white-space: pre;
+}
+
+.code-example-comment
+{
+	background-color: #e0e0f0; 
+	padding: 0px 0px; 
+	font-family: monospace; 
+	font-size: 10pt; 
+	white-space: pre; 
+	color: #999999; 
+	margin: auto auto; 
+}
+
+
+.faq-question
+{
+	background-color: #D0E0D0;
+	font-size: 12pt;
+	font-weight: bold;
+	margin-bottom: 0.5em;
+	margin-top: 0em;
+	padding-left:8px;
+	padding-right:8px;
+	padding-top:2px;
+	padding-bottom:2px
+}
+
+.faq-answer
+{
+	display: block;
+	margin: 4pt 1em 0.5em 1em;
+}.box_indent {
+	margin-left: 3em;
+}
+.grayed_text {
+	color: #CCCCCC;
+}
+.unobtrusive_link {
+	text-decoration:none;
+	color:#000066;
+}

+ 364 - 0
doc/UserGuide.html

@@ -0,0 +1,364 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head>
+<style type="text/css">
+<!--
+.style1 {font-family: "Courier New", Courier, mono}
+-->
+</style>
+</head>
+<body>
+<h1>User Guide</h1>
+<h3>Introduction</h3>
+<p>This document provides a brief description of the EAThread modules and
+    then provides some basic information on using these modules. You will
+    want to consult documentation for individual modules for more detailed
+information about them. </p>
+<p>All code is in C++ and largely follows the EA coding guidelines as of
+    January of 2004. All classes are in the EA::Thread C++ namespace.
+    Thus, the fully qualified name of the Mutex class is
+    EA::Thread::Mutex. Most of the code is plain C++ and
+    doesn't attempt to be very academic with the language. Thus RTTI is not
+    used, template usage is used only in one module (FixedAllocator),
+    exception handling is not used, etc. Unit tests have been set up for
+    most of the functionality and are available with the full package. The
+    headers are heavily commented in Doxygen-savvy format and the source
+code for the primary modules has been heavily commented as well. </p>
+<h3>EAThread Modules<span style="font-weight: bold;"></span>
+</h3>
+<div style="margin-left: 40px;">
+  <table style="text-align: left; width: 100%;" border="1" cellpadding="2" cellspacing="2">
+    <tbody> 
+    <tr>
+      <td style="vertical-align: top;" valign="top"><span style="font-weight: bold;">Module</span><br>
+      </td>
+      <td style="vertical-align: top;" valign="top"><span style="font-weight: bold;">Description</span></td>
+      <td style="vertical-align: top;" valign="top"><span style="font-weight: bold;">Source</span><br>
+      </td>
+      <td style="vertical-align: top;" valign="top"><span style="font-weight: bold;">Dependencies</span><br>
+      </td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Thread<br>
+      </td>
+      <td style="vertical-align: top;" valign="top"> Implements the creation and 
+        control of individual threads. <br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread.h/cpp<br>
+eathread_thread.h/cpp<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase</td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Storage<br>
+      </td>
+      <td style="vertical-align: top;" valign="top">Implements thread-specific 
+        storage (a.k.a. thread-local storage). This is a mechanism whereby a given 
+        named global variable exists not once globally but exists once per thread. 
+        Each thread gets its own view of the variable. <br>
+      </td>
+      <td style="vertical-align: top;" valign="top">eathread_storage.h/cpp<br>
+      </td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+        eathead.h/cpp<br>
+        eathread_mutex.h/cpp*<br>
+      <br>
+      </td>
+    </tr>
+<tr>
+      <td style="vertical-align: top;" valign="top">Atomic</td>
+      <td style="vertical-align: top;" valign="top">Implements atomic operations 
+        on integers and pointers. These are useful for doing thread-safe basic 
+        operations and tests on integers or pointers without the cost of more 
+        expensive synchronization primitives such as mutexes.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_atomic.h<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase</td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Mutex<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements traditional mutual 
+        exclusion. Mutexes here encompass critical section functionality (a.k.a. 
+        futex) and traditional cross-process exclusion.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_mutex.h/cpp<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+eathread.h/cpp<br>
+</td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Futex</td>
+      <td style="vertical-align: top;" valign="top">Implements a fast mutex. A fast mutex is a mutex which can be faster because it acts entirely within user space within the current process and can possibly have some of its code inlined. </td>
+      <td style="vertical-align: top;" valign="top">eathread_futex.h/cpp</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+eathread.h/cpp</td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">ReadWriteMutex<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements a mutex that allows 
+        multiple concurrent reading threads but only one writing thread. This 
+        is useful for situations where one thread is updating a state but multiple 
+        threads may be reading that state.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_rwmutex.h/cpp<br>
+</td>
+      <td style="vertical-align: top;" valign="top"><font size="-2"></font>EABase<br>
+        eathread.h/cpp<br>
+        eathread_atomic.h<br>
+        eathread_condition.h/cpp </td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Semaphore<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements a traditional sempahore. 
+        A semaphore has zero or positive count associated with it; a thread can 
+        'grab' the semaphore if the count is greater than zero and grabbing it 
+        reduces its count by one. When the count is zero, threads must wait until 
+        it is incremented, which can be done arbitrarily. The semaphore is the 
+        primitive upon which all other high level primitives can be constructed.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_semaphore.h/cpp<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+        eathread.h/cpp<br>
+        eathread_atomic.h<br>
+      </td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Condition<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements a condition variable, 
+        which is a synchronization primitive that supports the producer/consumer 
+        pattern. It is for all practical purposes also known as a "monitor" in 
+        Java and C#. This primitive is particularly useful for implementing efficient 
+        cross thread-messaging systems or worker thread job implementations.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_condition.h/cpp<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+        eathread.h/cpp<br>
+        eathread_atomic.h<br>
+        eathread_mutex.h/cpp eathread_semaphore.h/cpp<br>
+</td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">Barrier<br>
+      </td>
+      <td style="vertical-align: top;" valign="top">Implements a cyclic barrier 
+        primitive. A barrier is a primitive which coordinates the completion of 
+        work by a predetermined number of threads. A barrier has an integer max 
+        "height" and a current height associated with it. When a thread hits the 
+        barrier, it blocks until the prescribed number of threads hit the barrier, 
+        then all are freed.<br>
+      </td>
+      <td style="vertical-align: top;" valign="top">eathread_barrier.h/cpp<br>
+      </td>
+      <td style="vertical-align: top;" valign="top"> 
+        <p>EABase<br>
+          eathread.h/cpp<br>
+          eathread_atomic.h<br>
+          eathread_semaphore.h/cpp<br>
+        </p>
+        </td>
+    </tr>
+<tr>
+      <td style="vertical-align: top;" valign="top">SpinLock<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements a traditional spin 
+        lock. A spin lock is a special kind of mutex that "spins" in a loop waiting 
+        to be able to continue instead of blocking like a mutex. A spinlock is 
+        more efficient than a mutex but it generally doesn't work unless operating 
+        on a true multi-processing system. When it does work on a true multi-processing 
+        system it is inefficient.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_spinlock.h<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+        eathread.h/cpp<br>
+        eathread_sync.h <br>
+        eathread_atomic.h<br>
+</td>
+    </tr>
+    <tr>
+      <td style="vertical-align: top;" valign="top">ReadWriteSpinLock<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements a spinlock that 
+        allows multiple readers but only a single writer. Otherwise it is similar 
+        to a basic spin lock with respect to purpose and applicability. <br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_rwspinlock.h<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+        eathread.h/cpp<br>
+        eathread_sync.h <br>
+        eathread_atomic.h<br>
+      </td>
+    </tr><tr>
+      <td style="vertical-align: top;" valign="top">ThreadPool<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements a "pool" of worker 
+        threads available for work. These are commonly used by server systems 
+        to spawn off client-server tasks.<br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_pool.h/cpp<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase<br>
+        eathread.h/cpp<br>
+        eathread_thread.h/cpp<br>
+        eathread_condition.h/cpp<br>
+        eathread_atomic.h<br>
+        eathread_list.h</td>
+    </tr><tr>
+      <td style="vertical-align: top;" valign="top">Sync<br>
+</td>
+      <td style="vertical-align: top;" valign="top">Implements memory synchronization 
+        primitives known as "fences" or "barriers" (not to be confused with thread 
+        barrier primitives). These primitives are useful on multiprocessing platforms 
+        for synchronizing the various processors' view of memory, which can become 
+        "unsynchronized" in the presence of per-processor caches. <br>
+</td>
+      <td style="vertical-align: top;" valign="top">eathread_sync.h<br>
+</td>
+      <td style="vertical-align: top;" valign="top">EABase</td>
+    </tr><tr>
+      <td style="vertical-align: top;" valign="top">shared_ptr_mt<br>
+shared_array_mt<br>
+      </td>
+      <td style="vertical-align: top;" valign="top">These are multithread-safe 
+        equivalents to regular smart pointers such as shared_ptr and shared_array. 
+        See the TL (Template Library) for implementations of the regular versions 
+        of these smart pointers.<br>
+      </td>
+      <td style="vertical-align: top;" valign="top">shared_ptr_mt.h<br>
+
+shared_array_mt.h</td>
+      <td style="vertical-align: top;" valign="top"> 
+        <p>eathread_atomic.h<br>
+          eathread_mutex.h <br>
+        </p>
+        </td>
+    </tr>
+
+
+
+  </tbody>
+</table>
+  <p>* May not be required, depending on your platform/configuration.<br>
+  </p>
+</div>
+
+
+<h3><span style="font-weight: bold;"></span>  Examples</h3>
+<p>We present some
+  very basic examples of how to use some of the EAThread modules. These
+  exemplify the simplest uses of these modules and don't go into more
+  advanced or complicated uses. There is more functionality in each of
+  the classes than shown; see the documentation or header files for more
+  information. For clarity, the examples assume that the code has
+  specified the <span style="font-family: monospace;">using EA::Thread;</span> namespace statement.<br>
+  <br>
+How to create a thread.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_thread.h"<br>
+    <br>
+  int ThreadFunction(void* pContext){<br>
+&nbsp;&nbsp; return 0;<br>
+  }<br>
+  <br>
+  Thread thread;<br>
+  thread.Begin(ThreadFunction);</p>
+</blockquote>
+<p>How to use thread-local storage.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_storage.h"<br>
+    <br>
+    ThreadLocalStorage tls;<br>
+    tls.SetValue("hello");<br>
+    const char* pString = (const char*)tls.GetValue();</p>
+</blockquote>
+<p>How to create and use an atomic integer.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_atomic.h"<br>
+    <br>
+    AtomicInteger i = 5;<br>
+    i += 7;<br>
+    --i;<br>
+    if(i.SetValueConditional(3, 6))<br>
+&nbsp;&nbsp; printf("i was 6 and now is 3.");</p>
+</blockquote>
+<p>How to create and use a mutex.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_mutex.h"<br>
+    <br>
+    Mutex mutex(NULL, true);<br>
+    mutex.Lock();<br>
+    mutex.Unlock();</p>
+</blockquote>
+<p>How to create and use a futex.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_futex.h"<br>
+      <br>
+    Futex futex;<br>
+    futex.Lock();<br>
+    futex.Unlock();</p>
+</blockquote>
+<p>How to create and use a semaphore.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_semaphore.h"<br>
+    <br>
+    Semaphore semaphore(NULL, true);<br>
+    semaphore.Post();<br>
+    semaphore.Wait();</p>
+</blockquote>
+<p>How to create and use a condition variable.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_condition.h"<br>
+    <br>
+    Condition condition(NULL, true);<br>
+    Mutex&nbsp;&nbsp;&nbsp;&nbsp; mutex(NULL, true);<br>
+    condition.Signal();<br>
+    condition.Wait(&amp;mutex);</p>
+</blockquote>
+<p>How to create and use a spin lock.</p>
+<blockquote>
+  <p class="style1">#include "eathread/eathread_spinlock.h"<br>
+    <br>
+    SpinLock spinLock;<br>
+    spinLock.Lock();<br>
+  spinLock.Unlock();</p>
+</blockquote>
+<p>How to create and use a shared_ptr_mt.</p>
+<blockquote>
+  <p><span class="style1">#include "eathread/shared_ptr_mt.h"<br>
+    <br>
+    shared_ptr_mt pObject(new SomeClass);<br>
+    pObject-&gt;DoSomething();</span></p>
+</blockquote>
+<hr style="width: 100%; height: 2px;">End of document<br>
+
+
+
+<br>
+
+
+<br>
+
+<br>
+
+<br>
+
+<br>
+
+<br>
+
+<br>
+
+<br>
+<br>
+<br>
+<br>
+</body>
+</html>

+ 210 - 0
doc/doxygen/eathread.doxygen.config

@@ -0,0 +1,210 @@
+# Doxyfile 1.3.3
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = EAThread
+PROJECT_NUMBER         = 1.04.00
+OUTPUT_DIRECTORY       = ./
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = YES
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        = 
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP         = NO
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ALIASES                = 
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SHOW_USED_FILES        = YES
+SUBGROUPING            = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = "../../include/eathread/" \
+                         "../../source/"
+FILE_PATTERNS          = *.h* \
+                         *.c*
+RECURSIVE              = YES
+EXCLUDE                = 
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = 
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+CHM_FILE               = 
+HHC_LOCATION           = 
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = YES
+TREEVIEW_WIDTH         = 200
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = NO
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = NO
+XML_OUTPUT             = xml
+XML_SCHEMA             = 
+XML_DTD                = 
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX = 
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = NO
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = 
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 0
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
+CGI_NAME               = search.cgi
+CGI_URL                = 
+DOC_URL                = 
+DOC_ABSPATH            = 
+BIN_ABSPATH            = /usr/local/bin/
+EXT_DOC_PATHS          = 

+ 226 - 0
include/eathread/android/eathread_atomic_android.h

@@ -0,0 +1,226 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_GCC_EATHREAD_ATOMIC_ANDROID_H
+#define EATHREAD_GCC_EATHREAD_ATOMIC_ANDROID_H
+
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <sys/atomics.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// android_fake_atomics_*
+		///
+		int64_t android_fake_atomic_swap_64(int64_t value, volatile int64_t* addr);
+		int android_fake_atomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, volatile int64_t* addr);
+		int64_t android_fake_atomic_read_64(volatile int64_t* addr);
+
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T            ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x)
+				: mValue(x.GetValue()) {}
+
+			AtomicInt& operator=(const ThisType& x)
+				{ mValue = x.GetValue(); return *this; }
+
+			ValueType GetValue() const
+				{ return mValue; }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			volatile ValueType mValue;
+		};
+
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+			{ return __atomic_swap(n, &mValue); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+			{ return __atomic_swap(n, (volatile int*)&mValue); }
+
+		template <> inline
+		bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return (__atomic_cmpxchg(condition, n, &mValue) == 0); }
+
+		template <> inline
+		bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return (__atomic_cmpxchg(condition, n, (volatile int*)&mValue) == 0); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+			{ return __atomic_inc(&mValue) + 1; }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+			{ return __atomic_inc((volatile int*)&mValue) + 1; }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+			{ return __atomic_dec(&mValue) - 1; }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+			{ return __atomic_dec((volatile int*)&mValue) - 1; }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+			{
+				// http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Atomic-Builtins.html
+				return __sync_add_and_fetch(&mValue, n); 
+			}
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+			{ 
+				// http://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Atomic-Builtins.html
+				return __sync_add_and_fetch(&mValue, n); 
+			}
+
+
+		///////////////////////////////////////////////////////////
+		/// 64 bit, simulated
+		///
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+			{ return android_fake_atomic_read_64((volatile int64_t*)&mValue); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+			{ return android_fake_atomic_read_64((volatile int64_t*)&mValue); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+		{
+			const ValueType nOldValue(mValue);
+			android_fake_atomic_swap_64((int64_t)n, (volatile int64_t*)&mValue);
+			return nOldValue;
+		}
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+		{
+			const ValueType nOldValue(mValue);
+			android_fake_atomic_swap_64((int64_t)n, (volatile int64_t*)&mValue);
+			return nOldValue;
+		}
+
+		template <> inline
+		bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+		{
+			return android_fake_atomic_cmpxchg_64(condition, n, (volatile int64_t*)&mValue) == 0;
+		}
+
+		template <> inline
+		bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+		{
+			return android_fake_atomic_cmpxchg_64(condition, n, (volatile int64_t*)&mValue) == 0;
+		}
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+		{
+			int64_t old;
+
+			do {
+				old = mValue;
+			}
+			while (android_fake_atomic_cmpxchg_64((int64_t)old, (int64_t)old+n, (volatile int64_t*)&mValue) != 0);
+
+			return mValue;
+		}
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+		{
+			uint64_t old;
+
+			do {
+				old = mValue;
+			}
+			while (android_fake_atomic_cmpxchg_64((int64_t)old, (int64_t)old+n, (volatile int64_t*)&mValue) != 0);
+
+			return mValue;
+		}
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+			{ return Add(-1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+			{ return Add(-1); }
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_GCC_EATHREAD_ATOMIC_ANDROID_H
+
+
+
+
+
+
+
+
+

+ 221 - 0
include/eathread/android/eathread_atomic_android_c11.h

@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_ATOMIC_ANDROID_C11_H
+#define EATHREAD_ATOMIC_ANDROID_C11_H
+
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <stdatomic.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T ValueType;
+			typedef _Atomic(T) AtomicValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x) 
+				{ SetValue(x.GetValue()); }
+
+			AtomicInt& operator=(const ThisType& x)
+				{ SetValue(x.GetValue()); return *this; }
+
+			ValueType GetValue() const
+				{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+			ValueType GetValueRaw() const
+				{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			AtomicValueType mValue;
+		};
+
+
+		///////////////////////////////////////////////////////////
+		/// 32 bit
+		///
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+			{ return atomic_fetch_add_explicit(&mValue, 1, memory_order_relaxed) + 1; }  
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+			{ return atomic_fetch_add_explicit(&mValue, 1u, memory_order_relaxed) + 1u; }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+			{ return atomic_fetch_sub_explicit(&mValue, 1, memory_order_relaxed) - 1; }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+			{ return atomic_fetch_sub_explicit(&mValue, 1u, memory_order_relaxed) - 1u; }  
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; } 
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; }
+
+
+		///////////////////////////////////////////////////////////
+		/// 64 bit
+		///
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+			{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+			{ return atomic_load_explicit(const_cast<AtomicValueType*>(&mValue), memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+			{ return atomic_exchange_explicit(&mValue, n, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return atomic_compare_exchange_strong_explicit(&mValue, &condition, n, memory_order_relaxed, memory_order_relaxed); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; }  
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+			{ return atomic_fetch_add_explicit(&mValue, n, memory_order_relaxed) + n; } 
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+			{ return Add(1); }
+
+		template <> inline
+		AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+			{ return Add(-1); }
+
+		template <> inline
+		AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+			{ return Add(-1); }
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Use of the C11 atomics API on Android is problematic because the platform 
+// implements the atomics API via macro wrappers around their platform specific
+// functions.  Unfortunately, macros affect header files outside of its
+// scoped namespace and will be applied to areas of the code in undesirable
+// ways.  One instance of this is the C11 atomics colliding with the atomic
+// functions of C++11 std::shared_ptr.
+// 
+// We attempt to prevent external impact of the stdatomics.h by undefining the
+// relevant functions.
+// 
+// Note:  If you #include <stdatomic.h> above an eathread header it will undefined macros.
+//
+// http://en.cppreference.com/w/cpp/memory/shared_ptr
+//
+// std::atomic_compare_exchange_strong(std::shared_ptr)
+// std::atomic_compare_exchange_strong_explicit(std::shared_ptr)
+// std::atomic_compare_exchange_weak(std::shared_ptr)
+// std::atomic_compare_exchange_weak_explicit(std::shared_ptr)
+// std::atomic_exchange(std::shared_ptr)
+// std::atomic_exchange_explicit(std::shared_ptr)
+// std::atomic_is_lock_free(std::shared_ptr)
+// std::atomic_load(std::shared_ptr)
+// std::atomic_load_explicit(std::shared_ptr)
+// std::atomic_store(std::shared_ptr)
+// std::atomic_store_explicit(std::shared_ptr)
+//
+
+#undef atomic_compare_exchange_strong
+#undef atomic_compare_exchange_strong_explicit
+#undef atomic_compare_exchange_weak
+#undef atomic_compare_exchange_weak_explicit
+#undef atomic_exchange
+#undef atomic_exchange_explicit
+#undef atomic_is_lock_free
+#undef atomic_load
+#undef atomic_load_explicit
+#undef atomic_store
+#undef atomic_store_explicit
+
+#endif // EATHREAD_ATOMIC_ANDROID_C11_H
+
+
+

+ 259 - 0
include/eathread/apple/eathread_atomic_apple.h

@@ -0,0 +1,259 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for threadsafe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_APPLE_EATHREAD_ATOMIC_APPLE_H
+#define EATHREAD_APPLE_EATHREAD_ATOMIC_APPLE_H
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <libkern/OSAtomic.h>
+#include "eathread/internal/atomic.h"
+#include "eathread/internal/eathread_atomic_standalone.h"
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T            ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{ }
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x)
+				: mValue(x.GetValue()) { }
+
+			AtomicInt& operator=(const ThisType& x)
+				{ mValue = x.GetValue(); return *this; }
+
+			ValueType GetValue() const
+				{ return mValue; }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			volatile ValueType mValue;
+		};
+		
+		template <>
+		class AtomicInt<uint64_t>
+		{
+		public:
+			typedef AtomicInt<uint64_t> ThisType;
+			typedef uint64_t          ValueType;
+			
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+			{}
+			
+			AtomicInt(ValueType n)
+			{ SetValue(n); }
+			
+			AtomicInt(const ThisType& x)
+			: mValue(x.GetValue()) {}
+			
+			AtomicInt& operator=(const ThisType& x)
+			{ mValue = x.GetValue(); return *this; }
+			
+			ValueType GetValue() const
+			{ return (uint64_t)AtomicGetValue64((volatile int64_t *)&mValue); }
+			
+			ValueType GetValueRaw() const
+			{ return mValue; }
+			
+			ValueType SetValue(ValueType n)
+			{ return (uint64_t)AtomicSetValue64((volatile int64_t *)&mValue, n); }
+			
+			bool      SetValueConditional(ValueType n, ValueType condition)
+			{ return AtomicSetValueConditional64((volatile int64_t *)&mValue, n, condition); }
+			
+			ValueType Increment()
+			{ return (uint64_t)AtomicAdd64((volatile int64_t *)&mValue, 1); }
+			
+			ValueType Decrement()
+			{ return (uint64_t)AtomicAdd64((volatile int64_t *)&mValue, -1); }
+			
+			ValueType Add(ValueType n)
+			{ return (uint64_t)AtomicAdd64((volatile int64_t *)&mValue, n); }
+			
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+			
+		protected:
+			volatile ValueType mValue;
+		}__attribute__((aligned(8)));
+		
+		template <>
+		class AtomicInt<int64_t>
+		{
+		public:
+			typedef AtomicInt<int64_t> ThisType;
+			typedef int64_t          ValueType;
+			
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+			{}
+			
+			AtomicInt(ValueType n)
+			{ SetValue(n); }
+			
+			AtomicInt(const ThisType& x)
+			: mValue(x.GetValue()) {}
+			
+			AtomicInt& operator=(const ThisType& x)
+			{ mValue = x.GetValue(); return *this; }
+			
+			ValueType GetValue() const
+			{ return AtomicGetValue64((volatile int64_t *)&mValue); }
+			
+			ValueType GetValueRaw() const
+			{ return mValue; }
+			
+			ValueType SetValue(ValueType n)
+			{ return AtomicSetValue64((volatile int64_t *)&mValue, n); }
+			
+			bool      SetValueConditional(ValueType n, ValueType condition)
+			{ return AtomicSetValueConditional64((volatile int64_t *)&mValue, n, condition); }
+			
+			ValueType Increment()
+			{ return AtomicAdd64((volatile int64_t *)&mValue, 1); }
+			
+			ValueType Decrement()
+			{ return AtomicAdd64((volatile int64_t *)&mValue, -1); }
+			
+			ValueType Add(ValueType n)
+			{ return AtomicAdd64((volatile int64_t *)&mValue, n); }
+			
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+			
+		protected:
+			volatile ValueType mValue;
+		}__attribute__((aligned(8)));
+		
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+			{ return OSAtomicAdd32(0, reinterpret_cast<volatile int32_t*>(const_cast<ValueType*>(&mValue))); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+			{ return OSAtomicAdd32(0, reinterpret_cast<volatile int32_t*>(const_cast<ValueType*>(&mValue))); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+		{ 
+			int32_t old;
+			do
+			{
+				old = mValue; 
+			}
+			while ( ! OSAtomicCompareAndSwap32(old, n, reinterpret_cast<volatile int32_t*>(&mValue)));
+			return old; 
+		}
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+		{
+			uint32_t old;
+			do
+			{
+				old = mValue;
+			} while ( ! OSAtomicCompareAndSwap32(old, n, reinterpret_cast<volatile int32_t*>(&mValue)));
+			return old;
+		}
+
+		template <> inline
+		bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return OSAtomicCompareAndSwap32(condition, n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+			{ return OSAtomicCompareAndSwap32(condition, n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+			{ return OSAtomicIncrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+			{ return OSAtomicIncrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+			{ return OSAtomicDecrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+			{ return OSAtomicDecrement32(reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+			{ return OSAtomicAdd32(n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+
+		template <> inline
+		AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+			{ return OSAtomicAdd32(n, reinterpret_cast<volatile int32_t*>(&mValue)); }
+	}
+}
+
+#endif

+ 69 - 0
include/eathread/apple/eathread_callstack_apple.h

@@ -0,0 +1,69 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_CALLSTACK_APPLE_H
+#define EATHREAD_CALLSTACK_APPLE_H
+
+
+#include <eathread/eathread_callstack.h>
+
+namespace EA
+{
+namespace Thread
+{
+
+	/// ModuleInfoApple
+	///
+	/// This struct is based on the EACallstack ModuleInfo struct, but that can't be used here because
+	/// this package is a lower level package than EACallstack.
+	///
+	struct ModuleInfoApple
+	{
+		char8_t  mPath[256];        /// File name or file path
+		char8_t  mName[256];        /// Module name. Usually the same as the file name without the extension.
+		uint64_t mBaseAddress;      /// Base address in memory.
+		uint64_t mSize;             /// Module size in memory.
+		char     mType[32];         /// The type field (e.g. __TEXT) from the vmmap output.
+		char     mPermissions[16];  /// The permissions "r--/rwx" kind of string from the vmmap output.
+	};
+
+
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+	/// GetModuleInfoApple
+	///
+	/// This function exists for the purpose of being a central module/VM map info collecting function,
+	/// used by a couple functions within this package.
+	/// Writes as many entries as possible to the user-supplied array, up to the capacity of the array.
+	/// Returns the required number of entries, which may be more than the user-supplied capacity in the
+	/// case that the user didn't supply enough.
+	///
+	size_t GetModuleInfoApple(ModuleInfoApple* pModuleInfoAppleArray, size_t moduleInfoAppleArrayCapacity, 
+								const char* pTypeFilter = NULL, bool bEnableCache = true);
+#endif
+
+
+} // namespace Callstack
+
+} // namespace EA
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 61 - 0
include/eathread/apple/eathread_sync_apple.h

@@ -0,0 +1,61 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_APPLE_EATHREAD_SYNC_APPLE_H
+#define EATHREAD_APPLE_EATHREAD_SYNC_APPLE_H
+
+
+#include <EABase/eabase.h>
+#include <libkern/OSAtomic.h>
+
+
+#define EA_THREAD_SYNC_IMPLEMENTED
+
+
+// EAProcessorPause
+// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for
+// high performance spinning, as otherwise a high performance penalty incurs.
+
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+	#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+#else
+	#define EAProcessorPause()
+#endif
+
+
+
+// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+
+#define EAReadBarrier      OSMemoryBarrier
+#define EAWriteBarrier     OSMemoryBarrier
+#define EAReadWriteBarrier OSMemoryBarrier
+
+
+
+// EACompilerMemoryBarrier
+
+#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+
+
+
+
+#endif // EATHREAD_APPLE_EATHREAD_SYNC_APPLE_H
+
+
+
+
+
+
+
+

+ 54 - 0
include/eathread/arm/eathread_sync_arm.h

@@ -0,0 +1,54 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+//
+// Created by Rob Parolin 
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_ARM_EATHREAD_SYNC_ARM_H
+#define EATHREAD_ARM_EATHREAD_SYNC_ARM_H
+
+#include <EABase/eabase.h>
+
+
+#if defined(EA_COMPILER_CLANG)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	#define EAProcessorPause()
+
+	#define EAReadBarrier      __sync_synchronize
+	#define EAWriteBarrier     __sync_synchronize
+	#define EAReadWriteBarrier __sync_synchronize
+
+	#define EACompilerMemoryBarrier() __asm__ __volatile__ ("" : : : "memory")
+
+
+#elif defined(EA_COMPILER_GNUC)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	#define EAProcessorPause()
+
+	#if (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+		#define EAReadBarrier      __sync_synchronize
+		#define EAWriteBarrier     __sync_synchronize
+		#define EAReadWriteBarrier __sync_synchronize
+	#else
+		#define EAReadBarrier      EACompilerMemoryBarrier
+		#define EAWriteBarrier     EACompilerMemoryBarrier
+		#define EAReadWriteBarrier EACompilerMemoryBarrier
+	#endif
+
+	#define EACompilerMemoryBarrier() __asm__ __volatile__ ("" : : : "memory")
+
+#endif
+
+#endif // EATHREAD_ARM_EATHREAD_SYNC_ARM_H
+

+ 26 - 0
include/eathread/armgcc/eathread_sync_armgcc.h

@@ -0,0 +1,26 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+//
+// Created by Rob Parolin 
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_ARMGCC_EATHREAD_SYNC_ARMGCC_H
+#define EATHREAD_ARMGCC_EATHREAD_SYNC_ARMGCC_H
+
+// Header file should not be included directly.  Provided here for backwards compatibility.
+// Please use eathread_sync.h
+
+#if defined(EA_PROCESSOR_ARM) 
+	#include <eathread/arm/eathread_sync_arm.h>
+#endif
+
+#endif

+ 208 - 0
include/eathread/cpp11/eathread_atomic_cpp11.h

@@ -0,0 +1,208 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EATHREAD_ATOMIC_CPP11_H
+#define EATHREAD_ATOMIC_CPP11_H
+
+EA_DISABLE_VC_WARNING(4265 4365 4836 4571 4625 4626 4628 4193 4127 4548 4574 4731)
+#include <atomic>
+EA_RESTORE_VC_WARNING()
+
+namespace EA
+{
+	namespace Thread
+	{
+		#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+		/// Non-member atomic functions
+		/// These act the same as the class functions below.
+		/// The T return values are the new value, except for the AtomicSwap function which returns the swapped out value.
+		///
+		/// todo: Implement me when we have a platform to test this on.  C++11 atomics are disabled on all platforms. 
+		///
+
+		template <class T>
+		class EATHREADLIB_API AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt() {}
+
+			/// AtomicInt
+			/// Constructs with an intial value. 
+			AtomicInt(ValueType n) : mValue(n) {}
+
+			/// AtomicInt
+			/// Copy ctor. Uses GetValue to read the value, and thus is synchronized. 
+			AtomicInt(const ThisType& x) : mValue(x.GetValue()) {}
+
+			/// AtomicInt
+			/// Assignment operator. Uses GetValue to read the value, and thus is synchronized. 
+			AtomicInt& operator=(const ThisType& x)
+			{ mValue = x.GetValue(); return *this; }
+
+			/// GetValue
+			/// Safely gets the current value. A platform-specific version of 
+			/// this might need to do something more than just read the value.
+			ValueType GetValue() const volatile { return mValue; }
+
+			/// GetValueRaw
+			/// "Unsafely" gets the current value. This is useful for algorithms 
+			/// that want to poll the value in a high performance way before 
+			/// reading or setting the value in a more costly thread-safe way. 
+			/// You should not use this function when attempting to do thread-safe
+			/// atomic operations.
+			ValueType GetValueRaw() const { return mValue; }
+
+			/// SetValue
+			/// Safely sets a new value. Returns the old value. Note that due to 
+			/// expected multithreaded accesses, a call to GetValue after SetValue
+			/// might return a different value then what was set with SetValue.
+			/// This of course depends on your situation.
+			ValueType SetValue(ValueType n) { return mValue.exchange(n); }
+
+			/// SetValueConditional
+			/// Safely the value to a new value if the original value is equal to 
+			/// a condition value. Returns true if the condition was met and the 
+			/// assignment occurred. The comparison and value setting are done as
+			/// an atomic operation and thus another thread cannot intervene between
+			/// the two as would be the case with simple C code.
+			bool SetValueConditional(ValueType n, ValueType condition) 
+			{ 
+				return mValue.compare_exchange_strong(condition, n); 
+			}
+
+			/// Increment
+			/// Safely increments the value. Returns the new value.
+			/// This function acts the same as the C++ pre-increment operator.
+			ValueType Increment() { return ++mValue; }
+
+
+			/// Decrement
+			/// Safely decrements the value. Returns the new value.
+			/// This function acts the same as the C++ pre-decrement operator.
+			ValueType Decrement() { return --mValue; }
+
+
+			/// Add
+			/// Safely adds a value, which can be negative. Returns the new value.
+			/// You can implement subtraction with this function by using a negative argument.
+			ValueType Add(ValueType n) { return (mValue += n); }
+
+
+			/// operators
+			/// These allow an AtomicInt object to safely act like a built-in type.
+			///
+			/// Note: The operators for AtomicInt behaves differently than standard
+			///         C++ operators in that it will always return a ValueType instead
+			///         of a reference.
+			///
+			/// cast operator
+			/// Returns the AtomicInt value as an integral type. This allows the 
+			/// AtomicInt to behave like a standard built-in integer type.
+			operator const ValueType() const { return mValue; }
+
+			/// operator =
+			/// Assigns a new value and returns the value after the operation.
+			///
+			ValueType operator=(ValueType n) { SetValue(n); return n; }
+
+			/// pre-increment operator+=
+			/// Adds a value to the AtomicInt and returns the value after the operation.
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			/// a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value + n, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator+=(ValueType n)  { mValue += n; return mValue; }
+
+			/// pre-increment operator-=
+			/// Subtracts a value to the AtomicInt and returns the value after the operation.
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value - n, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator-=(ValueType n) { mValue -= n; return mValue; }
+
+			/// pre-increment operator++
+			/// Increments the AtomicInt. 
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value + 1, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator++() { return ++mValue; }
+
+			/// post-increment operator++
+			/// Increments the AtomicInt and returns the value of the AtomicInt before
+			/// the increment operation. 
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator++(int) { return mValue++; }
+
+			/// pre-increment operator--
+			/// Decrements the AtomicInt.
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value - 1, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator--() { return --mValue; }
+
+			/// post-increment operator--
+			/// Increments the AtomicInt and returns the value of the AtomicInt before
+			/// the increment operation. 
+			///
+			/// This function doesn't obey the C++ standard in that it does not return 
+			//  a reference, but rather the value of the AtomicInt after the  
+			/// operation is complete. It must be noted that this design is motivated by
+			/// the fact that it is unsafe to rely on the returned value being equal to 
+			/// the previous value, as another thread might have modified the AtomicInt 
+			/// immediately after the subtraction operation.  So rather than returning the
+			/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+			/// used in the function.
+			ValueType operator--(int) { return mValue--;}
+
+		private:
+			std::atomic<T> mValue;
+		};
+
+	}
+}
+
+
+#endif // EATHREAD_ATOMIC_CPP11_H

+ 826 - 0
include/eathread/eathread.h

@@ -0,0 +1,826 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// eathread.h
+//
+// Created by Paul Pedriana, Maxis
+//
+// Provides some base global definitions for the EA::Thread library.
+//
+// Design
+// Many of the design criteria for EA::Thread is based on the design of the 
+// Posix threading standard. The Posix threading standard is designed to 
+// work portably on a wide range of operating systems and hardware, including
+// embedded systems and realtime environments. As such, Posix threads generally
+// represent a competent model to follow where possible. Windows and various
+// other platforms have independent multi-threading systems which are taken
+// into account here as well. If something exists in Windows but doesn't 
+// exist here (e.g. Thread suspend/resume), there is a decent chance that it 
+// is by design and for some good reason.
+// 
+// C++
+// There are a number of C++ libraries devoted to multithreading. Usually the 
+// goal of these libraries is provide a platform independent interface which
+// simplifies the most common usage patterns and helps prevent common errors.
+// Some of these libraries are basic wrappers around existing C APIs while 
+// others provide a new and different paradigm. We take the former approach
+// here, as it is provides more or less the same functionality but provides 
+// it in a straightforward way that is easily approached by those familiar 
+// with platform-specific APIs. This approach has been referred to as the 
+// "Wrapper Facade Pattern".
+//
+// Condition Variables
+// Posix condition variables are implemented via the Condition class. Condition 
+// is essentially the Java and C# name for Posix' condition variables. For some
+// people, a condition variable may seem similar to a Win32 Signal. In actuality
+// they are similar but there is one critical difference: a Signal does not 
+// atomically unlock a mutex as part of the signaling process. This results in
+// problematic race conditions that make reliable producer/consumer systems
+// impossible to implement.
+//
+// Signals
+// As of this writing, there isn't a Win32-like Signal class. The reason for this
+// is that Semaphore does most or all the duty that Signal does and is a little
+// more portable, given that Signals exist only on Win32 and not elsewhere.
+//
+// Timeouts
+// Timeouts are specified as absolute times and not relative times. This may
+// not be how Win32 threading works but it is what's proper and is how Posix
+// threading works. From the OpenGroup online pthread documentation on this:
+//     An absolute time measure was chosen for specifying the 
+//     timeout parameter for two reasons. First, a relative time 
+//     measure can be easily implemented on top of a function 
+//     that specifies absolute time, but there is a race 
+//     condition associated with specifying an absolute timeout 
+//     on top of a function that specifies relative timeouts. 
+//     For example, assume that clock_gettime() returns the 
+//     current time and cond_relative_timed_wait() uses relative 
+//     timeouts:
+//            clock_gettime(CLOCK_REALTIME, &now);
+//            reltime = sleep_til_this_absolute_time - now;
+//            cond_relative_timed_wait(c, m, &reltime);
+//     If the thread is preempted between the first statement and 
+//     the last statement, the thread blocks for too long. Blocking, 
+//     however, is irrelevant if an absolute timeout is used. 
+//     An absolute timeout also need not be recomputed if it is used 
+//     multiple times in a loop, such as that enclosing a condition wait.
+//     For cases when the system clock is advanced discontinuously by 
+//     an operator, it is expected that implementations process any 
+//     timed wait expiring at an intervening time as if that time had 
+//     actually occurred.
+// 
+// General Threads
+// For detailed information about threads, it is recommended that you read
+// various competent sources of information about multithreading and 
+// multiprocessing.
+//    Programming with POSIX(R) Threads, by David R. Butenhof
+//    http://www.opengroup.org/onlinepubs/007904975/basedefs/pthread.h.html
+//    usenet: comp.programming.threads
+//    http://www.openmp.org/index.cgi?faq
+//    http://www.lambdacs.com/cpt/MFAQ.html
+//    http://www.lambdacs.com/cpt/FAQ.html
+//    http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/processes_and_threads.asp
+//
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_H
+#define EATHREAD_EATHREAD_H
+
+#include <eathread/internal/config.h>
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing
+#elif EA_USE_CPP11_CONCURRENCY
+	EA_DISABLE_VC_WARNING(4265 4365 4836 4571 4625 4626 4628 4193 4127 4548 4574 4946 4350)
+	#include <chrono>
+	#include <thread>
+	EA_RESTORE_VC_WARNING()
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <pthread.h>
+	#if defined(_YVALS)         // Dinkumware doesn't usually provide gettimeofday or <sys/types.h>
+		#include <time.h>       // clock_gettime
+	#elif defined(EA_PLATFORM_UNIX)
+		#include <sys/time.h>   // gettimeofday
+	#endif
+#endif
+#if defined(EA_PLATFORM_APPLE)
+	#include <mach/mach_types.h>
+#endif
+#if defined(EA_PLATFORM_SONY) 
+	#include "sdk_version.h"
+	#include <kernel.h>
+#endif
+#include <limits.h>
+#include <float.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_THREAD_PREEMPTIVE / EA_THREAD_COOPERATIVE
+//
+// Defined or not defined.
+//
+// EA_THREAD_COOPERATIVE means that threads are not time-sliced by the 
+// operating system. If there exist multiple threads of the same priority 
+// then they will need to wait, sleep, or yield in order for the others 
+// to get time. See enum Scheduling and EATHREAD_SCHED for more info.
+//
+// EA_THREAD_PREEMPTIVE means that threads are time-sliced by the operating 
+// system at runtime. If there exist multiple threads of the same priority 
+// then the operating system will split execution time between them.
+// See enum Scheduling and EATHREAD_SCHED for more info.
+//
+#if !EA_THREADS_AVAILABLE 
+	#define EA_THREAD_COOPERATIVE
+#else
+	#define EA_THREAD_PREEMPTIVE
+#endif
+
+
+/// namespace EA
+///
+/// This is the standard Electronic Arts C++ namespace.
+///
+namespace EA
+{
+	namespace Allocator
+	{
+		class ICoreAllocator;
+	}
+
+	/// namespace Thread
+	///
+	/// This is the standard Electronic Arts Thread C++ namespace.
+	///
+	namespace Thread
+	{
+		/// Scheduling 
+		/// Defines scheduling types supported by the given platform.
+		/// These are defined in detail by the Posix standard, with the 
+		/// exception of Coop, which is added here. FIFO scheduling
+		/// is the most classic for game development, as it allows for 
+		/// thread priorities and well-behaved synchronization primitives,
+		/// but it doesn't do time-slicing. The problem with time slicing
+		/// is that threads are pre-empted in the middle of work and this
+		/// hurts execution performance and cache performance. 
+		///
+		enum Scheduling
+		{
+			kSchedulingFIFO     =  1,    /// There is no automatic time-slicing; thread priorities control execution and context switches.
+			kSchedulingRR       =  2,    /// Same as FIFO but is periodic time-slicing.
+			kSchedulingSporadic =  4,    /// Complex scheduling control. See the Posix standard.
+			kSchedulingTS       =  8,    /// a.k.a. SCHED_OTHER. Usually same as FIFO or RR except that thread priorities and execution can be temporarily modified.
+			kSchedulingCoop     = 16     /// The user must control thread scheduling beyond the use of synchronization primitives.
+		};
+		 
+		#if defined(EA_PLATFORM_UNIX)
+			#define EATHREAD_SCHED    kSchedulingFIFO
+
+		#elif defined(EA_PLATFORM_MICROSOFT)
+			#define EATHREAD_SCHED    kSchedulingRR
+
+		#else
+			#define EATHREAD_SCHED    kSchedulingFIFO
+
+		#endif
+
+
+		// EATHREAD_MULTIPROCESSING_OS
+		//
+		// Defined as 0 or 1. 
+		// Indicates whether the OS supports multiple concurrent processes, which may be in 
+		// addition to supporting multiple threads within a process.
+		// Some platforms support multiple concurrently loaded processes but don't support
+		// running these processes concurrently. We don't currently count this as a
+		// multiprocessing OS.
+		#ifndef EATHREAD_MULTIPROCESSING_OS
+			#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_UNIX)
+				#define EATHREAD_MULTIPROCESSING_OS 1
+			#else
+				#define EATHREAD_MULTIPROCESSING_OS 0
+			#endif
+		#endif
+		
+		// EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+		// 
+		// Defined as 0 or 1. 
+		// Indicates whether the OS supports setting the thread name from a different
+		// thread (set to 1) or if the name can be set only from the curren thread (set to 0)
+		#ifndef EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+			#if defined(EA_PLATFORM_LINUX) || defined(EA_PLATFORM_APPLE)
+				#define EATHREAD_OTHER_THREAD_NAMING_SUPPORTED 0
+			#else
+				#define EATHREAD_OTHER_THREAD_NAMING_SUPPORTED 1
+			#endif
+		#endif
+
+		// Uint / Int
+		// Defines a machine-word sized integer, useful for operations that are as efficient
+		// as possible on the given machine. Note that the C99 intfastNN_t types aren't sufficient,
+		// as they are defined by compilers in an undesirable way for the processors we work with.
+		#if !defined(EA_PLATFORM_WORD_SIZE) || (EA_PLATFORM_WORD_SIZE == 4)
+			typedef uint32_t Uint;
+			typedef int32_t  Int;
+		#else
+			typedef uint64_t Uint;
+			typedef int64_t  Int;
+		#endif
+
+
+		/// ThreadId
+		/// Uniquely identifies a thread throughout the system and is used by the EAThread API
+		/// to identify threads in a way equal to system provided thread ids. A ThreadId is the 
+		/// same as a system thread id and can be used in direct system threading API calls.
+		#if !EA_THREADS_AVAILABLE
+			typedef int ThreadId;
+		#elif EA_USE_CPP11_CONCURRENCY
+			typedef std::thread::id ThreadId;
+		#elif defined(EA_PLATFORM_SONY)
+			typedef uint64_t ThreadId;
+		#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+			typedef pthread_t ThreadId;
+		#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+			typedef void* ThreadId; // This is really HANDLE, but HANDLE is the same as void* and we can avoid an expensive #include here.
+		#else
+			typedef int ThreadId;
+		#endif
+
+
+		// ThreadId constants
+		#if EA_USE_CPP11_CONCURRENCY
+			const ThreadId kThreadIdInvalid = ThreadId(); /// Special ThreadId indicating an invalid thread identifier.
+		#else
+			const ThreadId kThreadIdInvalid  = ThreadId(0);            /// Special ThreadId indicating an invalid thread identifier.
+			const ThreadId kThreadIdCurrent  = ThreadId(INT_MAX);      /// Special ThreadId indicating the current thread.
+			const ThreadId kThreadIdAny      = ThreadId(INT_MAX - 1);  /// Special ThreadId indicating no thread in particular.
+		#endif
+
+		/// SysThreadId
+		/// It turns out that Microsoft operating systems (Windows, XBox, XBox 360)
+		/// have two different ways to identify a thread: HANDLE and DWORD. Some API
+		/// functions take thread HANDLES, while others take what Microsoft calls
+		/// thread ids (DWORDs). EAThread ThreadId is a HANDLE, as that is used for 
+		/// more of the core threading APIs. However, some OS-level APIs accept instead   
+		/// the DWORD thread id. 
+		#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE && !EA_USE_CPP11_CONCURRENCY
+			typedef uint32_t SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = SysThreadId(0); /// Special SysThreadId indicating an invalid thread identifier.
+		#elif defined(EA_PLATFORM_SONY)
+			typedef ScePthread SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = { 0 }; /// Special SysThreadId indicating an invalid thread identifier.
+		#elif defined(EA_PLATFORM_APPLE)
+			typedef thread_act_t SysThreadId; // thread_act_t is useful for calling mach APIs such as thread_policy_set() with. 
+			const SysThreadId kSysThreadIdInvalid = SysThreadId(0); /// Special SysThreadId indicating an invalid thread identifier.
+		#elif EA_USE_CPP11_CONCURRENCY
+			typedef std::thread::native_handle_type SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = { 0 }; /// Special SysThreadId indicating an invalid thread identifier.
+			
+			// For MSVC, native_handle_type is not a primitive type so we define operator== and operator!= for convenience.
+			// We use an auto converting proxy type for comparisons to avoid errors when native_handle_type is a built in type.
+			bool Equals(const SysThreadId& a, const SysThreadId& b);
+			struct SysThreadIdProxy 
+			{ 
+				SysThreadIdProxy(const SysThreadId& id_) : id(id_) {}
+				SysThreadId id;
+			};
+			inline bool operator==(const SysThreadId& lhs, const SysThreadIdProxy& rhs) { return Equals(lhs, rhs.id); }
+			inline bool operator!=(const SysThreadId& lhs, const SysThreadIdProxy& rhs) { return !Equals(lhs, rhs.id); }
+		#else
+			typedef ThreadId SysThreadId;
+			const SysThreadId kSysThreadIdInvalid = SysThreadId(0); /// Special SysThreadId indicating an invalid thread identifier.
+		#endif
+
+		/// ThreadUniqueId
+		/// Uniquely identifies a thread throughout the system, but in a way that is not 
+		/// necessarily compatible with system thread id identification. Sometimes it is 
+		/// costly to work with system thread ids whereas all you want is some integer 
+		/// that is unique between threads and you don't need to use it for system calls.
+		/// See the EAThreadGetUniqueId macro/function for usage.
+		typedef Uint ThreadUniqueId;
+
+		// ThreadUniqueId constants
+		const ThreadUniqueId kThreadUniqueIdInvalid = 0; /// Special ThreadUniqueId indicating an invalid thread identifier.
+
+
+		// Time constants
+		// Milliseconds are the units of time in EAThread. While every generation of computers
+		// results in faster computers and thus milliseconds become an increasingly large number
+		// compared to the computer speed, computer multithreading is still largely done at the 
+		// millisecond level, due to it still being a small value relative to human perception.
+		// We may reconsider this some time in the future and provide an option to have ThreadTime
+		// be specified in finer units, such as microseconds.
+		#if EA_USE_CPP11_CONCURRENCY
+			typedef std::chrono::milliseconds::rep ThreadTime;                               /// Current storage mechanism for time used by thread timeout functions. Units are milliseconds.
+			const   ThreadTime kTimeoutImmediate = std::chrono::milliseconds::zero().count();/// Used to specify to functions to return immediately if the operation could not be done.
+			const   ThreadTime kTimeoutNone = std::chrono::milliseconds::max().count();      /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const   ThreadTime kTimeoutYield = std::chrono::milliseconds::zero().count();    /// This is used with ThreadSleep to minimally yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))
+			#define EA_THREADTIME_AS_DOUBLE(t) ((double)(t))
+
+		#elif defined(EA_PLATFORM_SONY) && EA_THREADS_AVAILABLE
+			typedef double ThreadTime;  // SceKernelUseconds maps to unsigned int 
+			static_assert(sizeof(ThreadTime) >= sizeof(unsigned int), "ThreadTime not large enough for uint32_t representation of milliseconds for platform portablity");
+
+			const ThreadTime kTimeoutImmediate = 0;
+			const ThreadTime kTimeoutNone = DBL_MAX;
+			const ThreadTime kTimeoutYield = 0.000001; // 1 nanosecond in terms of a millisecond
+
+			#define EA_THREADTIME_AS_UINT_MICROSECONDS(t)  ((unsigned int)((t) * 1000.0))                           /// Returns the milliseconds time as uint in microseconds.           
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))                                                       /// Returns the unconverted milliseconds time as a int64_t.
+			#define EA_THREADTIME_AS_DOUBLE(t) (t)                                                                  /// Returns the time as double milliseconds. May include a fraction component.
+			#define EA_TIMESPEC_AS_UINT(t)  ((unsigned int)(((t).tv_sec * 1000) + ((t).tv_nsec / 1000000)))         /// Returns the time as uint in milliseconds.            
+			#define EA_TIMESPEC_AS_DOUBLE_IN_MS(t)  ( (((t).tv_sec * 1000000000ull) + ((t).tv_nsec))/1000000.0)     /// Returns the time as uint in milliseconds.            
+
+		#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+			struct ThreadTime : public timespec
+			{
+				typedef int seconds_t;  // To consider: change to uint64_t or maybe long.
+				typedef int nseconds_t;
+				
+				ThreadTime()                                            { tv_sec  = 0;                         tv_nsec  = 0; }
+				ThreadTime(const timespec& ts)                          { tv_sec  = ts.tv_sec;                 tv_nsec  = ts.tv_nsec; }
+				ThreadTime(seconds_t nSeconds, nseconds_t nNanoseconds) { tv_sec  = (long)nSeconds;            tv_nsec  = (long)nNanoseconds; }
+				ThreadTime(const int64_t& nMilliseconds)             { tv_sec  = (long)(nMilliseconds / 1000); tv_nsec  = (long)((nMilliseconds - (tv_sec * 1000)) * 1000000); }
+				ThreadTime& operator+=(const int64_t& nMilliseconds) { long lTemp((long)nMilliseconds / 1000); tv_sec  += lTemp; tv_nsec += (long)((nMilliseconds - (lTemp * 1000)) * 1000000); if(tv_nsec >= 1000000000){ tv_sec++; tv_nsec -= 1000000000; } return *this; }
+				ThreadTime& operator-=(const int64_t& nMilliseconds) { long lTemp((long)nMilliseconds / 1000); tv_sec  -= lTemp; tv_nsec -= (long)((nMilliseconds - (lTemp * 1000)) * 1000000); if(tv_nsec < 0)          { tv_sec--; tv_nsec += 1000000000; } return *this; }
+				ThreadTime& operator+=(const ThreadTime& tt)         { tv_sec += tt.tv_sec;                    tv_nsec += tt.tv_nsec; if(tv_nsec >= 1000000000){ tv_sec++; tv_nsec -= 1000000000; } return *this; }
+				ThreadTime& operator-=(const ThreadTime& tt)         { tv_sec -= tt.tv_sec;                    tv_nsec -= tt.tv_nsec; if(tv_nsec < 0)          { tv_sec--; tv_nsec += 1000000000; } return *this; }
+			};
+			inline ThreadTime operator+ (const ThreadTime& tt1, const ThreadTime& tt2)       { ThreadTime ttR(tt1); ttR += tt2;           return ttR; }
+			inline ThreadTime operator+ (const ThreadTime& tt,  const int64_t& nMilliseconds){ ThreadTime ttR(tt);  ttR += nMilliseconds; return ttR; }
+			inline ThreadTime operator- (const ThreadTime& tt1, const ThreadTime& tt2)       { ThreadTime ttR(tt1); ttR -= tt2;           return ttR; }
+			inline ThreadTime operator- (const ThreadTime& tt,  const int64_t& nMilliseconds){ ThreadTime ttR(tt);  ttR -= nMilliseconds; return ttR; }
+			inline bool       operator==(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_nsec == tt2.tv_nsec) && (tt1.tv_sec == tt2.tv_sec); } // These comparisons assume that the nsec value is normalized (always between 0 && 1000000000).
+			inline bool       operator!=(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_nsec != tt2.tv_nsec) || (tt1.tv_sec != tt2.tv_sec); }
+			inline bool       operator< (const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec <  tt2.tv_nsec) : (tt1.tv_sec <  tt2.tv_sec); }
+			inline bool       operator> (const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec >  tt2.tv_nsec) : (tt1.tv_sec >  tt2.tv_sec); }
+			inline bool       operator<=(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec <= tt2.tv_nsec) : (tt1.tv_sec <= tt2.tv_sec); }
+			inline bool       operator>=(const ThreadTime& tt1, const ThreadTime& tt2) { return (tt1.tv_sec == tt2.tv_sec) ? (tt1.tv_nsec >= tt2.tv_nsec) : (tt1.tv_sec >= tt2.tv_sec); }
+
+			const  ThreadTime kTimeoutImmediate(0, 0);            /// Used to specify to functions to return immediately if the operation could not be done.
+			const  ThreadTime kTimeoutNone(INT_MAX, INT_MAX);     /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const  ThreadTime kTimeoutYield(0, 0);                /// Used to specify to ThreadSleep to yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(((t).tv_sec * 1000) + ((t).tv_nsec / 1000000)))                   /// Returns the time as int64_t milliseconds.
+			#define EA_THREADTIME_AS_INT64_MICROSECONDS(t)  ((int64_t)(((t).tv_sec * 1000000) + (((t).tv_nsec / 1000))))    /// Returns the time as int64_t microseconds.
+			#define EA_THREADTIME_AS_DOUBLE(t) (((t).tv_sec * 1000.0) + ((t).tv_nsec / 1000000.0))                          /// Returns the time as double milliseconds.
+
+		#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
+			typedef uint64_t   ThreadTime;                        /// Current storage mechanism for time used by thread timeout functions. Units are milliseconds.
+			const   ThreadTime kTimeoutImmediate = 0;             /// Used to specify to functions to return immediately if the operation could not be done.
+			const   ThreadTime kTimeoutNone      = UINT64_MAX;    /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const   ThreadTime kTimeoutYield     = 0;             /// This is used with ThreadSleep to minimally yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))
+			#define EA_THREADTIME_AS_DOUBLE(t) ((double)(t))
+
+		#else
+			typedef unsigned   ThreadTime;                        /// Current storage mechanism for time used by thread timeout functions. Units are milliseconds.
+			const   ThreadTime kTimeoutImmediate = 0;             /// Used to specify to functions to return immediately if the operation could not be done.
+			const   ThreadTime kTimeoutNone      = UINT_MAX;      /// Used to specify to functions to block without a timeout (i.e. block forever).
+			const   ThreadTime kTimeoutYield     = 0;             /// This is used with ThreadSleep to minimally yield to threads of equivalent priority.
+
+			#define EA_THREADTIME_AS_INT64(t)  ((int64_t)(t))
+			#define EA_THREADTIME_AS_DOUBLE(t) ((double)(t))
+
+		#endif
+
+		#if defined(EA_PLATFORM_MICROSOFT)                        /// Can be removed from C++11 Concurrency builds once full C++11 implementation is completed
+			uint32_t RelativeTimeoutFromAbsoluteTimeout(ThreadTime absoluteTimeout);
+		#endif
+
+		// Thread priority constants
+		// There is a standardized mechanism to convert system-specific thread
+		// priorities to these platform-independent priorities and back without 
+		// loss of precision or behaviour. The convention is that kThreadPriorityDefault 
+		// equates to the system-specific normal thread priority. Thus for Microsoft
+		// APIs a thread with priority kThreadPriorityDefault will be of Microsoft
+		// priority THREAD_PRIORITY_NORMAL. A thread with an EAThread priority 
+		// of kThreadPriorityDefault + 1 will have a Microsoft priority of THREAD_PRIORITY_NORMAL + 1.
+		// The only difference is that with EAThread all platforms are standardized on 
+		// kThreadPriorityDefault as the normal value and that higher EAThread priority
+		// integral values mean higher thread priorities for running threads. This last
+		// item is of significance because Sony platforms natively define lower integers
+		// to mean higher thread priorities. With EAThread you get consistent behaviour
+		// across platforms and thus kThreadPriorityDefault + 1 always results in a
+		// thread that runs at priority of one level higher. On Sony platforms, this + 1
+		// gets translated to a - 1 when calling the Sony native thread priority API.
+		// EAThread priorities have no mandated integral bounds, though
+		// kThreadPriorityMin and kThreadPriorityMax are defined as convenient practical
+		// endpoints for users.  Users should not generally use hard-coded constants to
+		// refer to EAThread priorities much like it's best not to use hard-coded
+		// constants to refer to platform-specific native thread priorities. Also, users
+		// generally want to avoid manipulating thread priorities to the extent possible
+		// and use conventional synchronization primitives to control execution.
+		// Similarly, wildly varying thread priorities such as +100 are not likely to
+		// achieve much and are not likely to be very portable.
+		//
+		const int kThreadPriorityUnknown = INT_MIN;      /// Invalid or unknown priority.
+		const int kThreadPriorityMin     =    -128;      /// Minimum thread priority enumerated by EAThread. In practice, a valid thread priority can be anything other than kThreadPriorityUnknown.
+		const int kThreadPriorityDefault =       0;      /// Default (a.k.a. normal) thread priority.
+		const int kThreadPriorityMax     =     127;      /// Maximum thread priority enumerated by EAThread. In practice, a valid thread priority can be anything other than kThreadPriorityUnknown.
+
+
+
+		/// kSysThreadPriorityDefault
+		/// Defines the platform-specific default thread priority.
+		#if defined(EA_PLATFORM_SONY)
+			const int kSysThreadPriorityDefault = 700;
+		#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+			const int kSysThreadPriorityDefault = 0; // Some Unix variants use values other than zero, but these are not relevant.
+		#elif defined(EA_PLATFORM_MICROSOFT)
+			const int kSysThreadPriorityDefault = 0; // Same as THREAD_PRIORITY_NORMAL
+		#else
+			const int kSysThreadPriorityDefault = 0;
+		#endif
+
+
+		// The following functions are standalone and not static members of the thread class 
+		// because they are potentially used by multiple threading primitives and we don't 
+		// want to create a dependency of threading primitives on class Thread.
+
+		/// GetThreadId
+		/// Gets the thread ID for the current thread. This thread ID should 
+		/// be unique throughout the system.
+		EATHREADLIB_API ThreadId GetThreadId();
+
+
+		/// GetSysThreadId
+		/// Gets the operating system thread id associated with the given ThreadId.
+		/// It turns out that Microsoft operating systems (Windows, XBox, XBox 360)
+		/// have two different ways to identify a thread: HANDLE and DWORD. Some API
+		/// functions take thread HANDLES, while others take what Microsoft calls
+		/// thread ids (DWORDs). EAThread ThreadId is a HANDLE, as that is used for 
+		/// more of the core threading APIs. However, some OS-level APIs accept instead   
+		/// the DWORD thread id. This function returns the OS thread id for a given 
+		/// EAThread ThreadId. In the case of Microsoft OSs, this returns a DWORD from
+		/// a HANDLE and with other OSs this function simply returns the ThreadId.
+		/// Returns a valid SysThreadId or kSysThreadIdInvalid if the input id is invalid.
+		EATHREADLIB_API SysThreadId GetSysThreadId(ThreadId id);
+
+
+		/// GetThreadId
+		///
+		/// This is a portable function to convert between ThreadId's and SysThreadId's.
+		/// For platforms that do not differentiate between these two types no conversion is attempted. 
+		EATHREADLIB_API ThreadId GetThreadId(SysThreadId id);
+
+
+		/// GetSysThreadId
+		/// Gets the SysThreadId for the current thread. This thread ID should 
+		/// be unique throughout the system.
+		EATHREADLIB_API SysThreadId GetSysThreadId();
+
+
+		/// GetThreadPriority
+		/// Gets the priority of the current thread.
+		/// This function can return any int except for kThreadPriorityUnknown, as the 
+		/// current thread's priority will always be knowable. A return value of kThreadPriorityDefault
+		/// means that this thread is of normal (a.k.a. default) priority.
+		/// See the documentation for thread priority constants (e.g. kThreadPriorityDefault) 
+		/// for more information about thread priority values and behaviour.
+		EATHREADLIB_API int GetThreadPriority();
+
+
+		/// SetThreadPriority
+		/// Sets the priority of the current thread.
+		/// Accepts any integer priority value except kThreadPriorityUnknown.
+		/// On some platforms, this function will automatically convert any invalid 
+		/// priority for that particular platform to a valid one.  A normal (a.k.a. default) thread 
+		/// priority is identified by kThreadPriorityDefault.
+		/// See the documentation for thread priority constants (e.g. kThreadPriorityDefault) 
+		/// for more information about thread priority values and behaviour.
+		EATHREADLIB_API bool SetThreadPriority(int nPriority);
+
+
+		/// GetThreadStackBase
+		/// Returns the base address of the current thread's stack.
+		/// Recall that on all supported platforms that the stack grows downward
+		/// and thus that the stack base address is of a higher value than the 
+		/// stack's contents.
+		EATHREADLIB_API void* GetThreadStackBase();
+
+
+		// Thread processor constants
+		const int kProcessorDefault = -1;    /// Use the default processor for the platform. On many platforms, the default is to not be tied to any specific processor, but other threads can only ever be bound to a single processor.
+		const int kProcessorAny     = -2;    /// Run the thread on any processor. Many platforms will switch threads between processors dynamically.
+
+
+		/// SetThreadProcessor  
+		/// Sets the processor the current thread should run on. Valid values 
+		/// are kThreadProcessorDefault, kThreadProcessorAny, or a processor
+		/// index in the range of [0, processor count). If the input value
+		/// is >= the processor count, it will be reduced to be a modulo of
+		/// the processor count. Any other invalid value will cause the processor
+		/// to be set to zero.
+		/// This function isn't guaranteed to restrict the thread from executing 
+		/// on the given processor for all platforms. Some platforms don't support
+		/// assigning thread processor affinity, while with others (e.g. Windows using 
+		/// SetThreadIdealProcessor) the OS tries to comply but will use a different
+		/// processor when the assigned one is unavailable.
+		EATHREADLIB_API void SetThreadProcessor(int nProcessor);
+		
+
+		/// GetThreadProcessor
+		/// Returns the (possibly virtual) CPU index that the thread is currently
+		/// running on. Different systems may have differing definitions of what
+		/// a unique processor is. Some CPUs have multiple sub-CPUs (e.g. "cores")
+		/// which are treated as unique processors by the system. 
+		/// Many systems switch threads between processors dynamically; thus it's 
+		/// possible that the thread may be on a different CPU by the time this 
+		/// function returns. 
+		/// Lastly, some systems don't provide the ability to detect what processor
+		/// the current thread is running on; in these cases this function returns 0.
+		EATHREADLIB_API int GetThreadProcessor();
+		
+
+		/// GetProcessorCount
+		/// Returns the (possibly virtual) CPU count that the current system has.
+		/// Some systems (e.g. Posix, Unix) don't expose an ability to tell how 
+		/// many processors there are; in these cases this function returns 1.
+		/// This function returns the number of currently active processors. 
+		/// Some systems can modify the number of active processors dynamically.
+		EATHREADLIB_API int GetProcessorCount();
+
+
+		/// kThreadAffinityMaskAny
+		/// Defines the thread affinity mask that enables the thread 
+		/// to float on all available processors.
+		typedef uint64_t ThreadAffinityMask;
+		const ThreadAffinityMask kThreadAffinityMaskAny = ~0U;
+
+
+		/// SetThreadAffinityMask
+		/// 
+		/// The nAffinityMask is a bit field where each bit designates a processor.
+		///  
+		/// This function isn't guaranteed to restrict the thread from executing 
+		/// on the given processor for all platforms. Some platforms don't support
+		/// assigning thread processor affinity, while with others (e.g. Windows using 
+		/// SetThreadIdealProcessor) the OS tries to comply but will use a different
+		/// processor when the assigned one is unavailable.
+		EATHREADLIB_API void SetThreadAffinityMask(ThreadAffinityMask nAffinityMask);
+		EATHREADLIB_API void SetThreadAffinityMask(const EA::Thread::ThreadId& id, ThreadAffinityMask nAffinityMask);
+	
+
+		/// GetThreadAffinityMask
+		///   
+		/// Returns the current thread affinity mask specified by the user.
+		EATHREADLIB_API ThreadAffinityMask GetThreadAffinityMask();
+		EATHREADLIB_API ThreadAffinityMask GetThreadAffinityMask(const EA::Thread::ThreadId& id);
+
+
+		/// GetName
+		/// Returns the name of the thread assigned by the SetName function.
+		/// If the thread was not named by the SetName function, then the name is empty ("").
+		EATHREADLIB_API const char* GetThreadName();
+		EATHREADLIB_API const char* GetThreadName(const EA::Thread::ThreadId& id);
+
+
+		/// SetThreadName
+		///
+		/// Sets a descriptive name or the thread. On some platforms this name is passed on
+		/// to the debugging tools so they can see this name. The name length, including a
+		/// terminating 0 char, is limited to EATHREAD_NAME_SIZE characters. Any characters
+		/// beyond that are ignored.
+		/// 
+		/// You can set the name of a Thread object only if it has already begun.  You can
+		/// also set the name with the Begin function via the ThreadParameters argument to
+		/// Begin. This design is in order to simplify the implementation, but being able
+		/// to set ThreadParameters before Begin is something that can be considered in the
+		/// future.
+		///
+		/// Some platforms (e.g. Linux) have the restriction that this function works
+		/// properly only when called by the same thread that you want to name. Given this
+		/// situation, the most portable way to use this SetName function is to either
+		/// always call it from the thread to be named or to use the ThreadParameters to
+		/// give the thread a name before it is started and let the started thread name
+		/// itself.
+		//
+		// 
+		//
+		EATHREADLIB_API void SetThreadName(const char* pName);
+		EATHREADLIB_API void SetThreadName(const EA::Thread::ThreadId& id, const char* pName);
+
+
+		/// ThreadSleep
+		/// Puts the current thread to sleep for an amount of time hinted at 
+		/// by the time argument. The timeout is merely a hint and the system 
+		/// thread scheduler might return well before the sleep time has elapsed.
+		/// The input 'timeRelative' refers to a relative time and not an
+		/// absolute time such as used by EAThread mutexes, semaphores, etc. 
+		/// This is for consistency with other threading systems such as Posix and Win32.
+		/// A sleep time of zero has the same effect as simply yielding to other
+		/// available threads.
+		///
+		EATHREADLIB_API void ThreadSleep(const ThreadTime& timeRelative = kTimeoutImmediate);
+
+
+		/// ThreadCooperativeYield
+		/// On platforms that use cooperative multithreading instead of 
+		/// pre-emptive multithreading, this function maps to ThreadSleep(0).
+		/// On pre-emptive platforms, this function is a no-op. The intention
+		/// is to allow cooperative multithreaded systems to yield manually
+		/// in order for other threads to run, but also not to penalize 
+		/// pre-emptive systems that don't need such manual yielding. If you 
+		/// want to forcefully yield on a pre-emptive system, call ThreadSleep(0).
+		#ifdef EA_THREAD_COOPERATIVE
+			#define ThreadCooperativeYield() EA::Thread::ThreadSleep(EA::Thread::kTimeoutYield)
+		#else
+			#define ThreadCooperativeYield()
+		#endif
+
+
+		/// End
+		/// This function provides a way for a thread to end itself.
+		EATHREADLIB_API void ThreadEnd(intptr_t threadReturnValue);
+
+
+		/// GetThreadTime
+		/// Gets the current absolute time in milliseconds.
+		/// This is required for working with absolute timeouts, for example.
+		/// To specify a timeout that is relative to the current time, simply
+		/// add time (in milliseconds) to the return value of GetThreadTime.
+		/// Alternatively, you can use ConvertRelativeTime to calculate an absolute time.
+		EATHREADLIB_API ThreadTime GetThreadTime();
+
+
+		/// ConvertRelativeTime
+		/// Given a relative time (in milliseconds), this function returns an 
+		/// absolute time (in milliseconds).
+		/// Example usage:
+		///     mutex.Lock(ConvertRelativeTime(1000));
+		EATHREADLIB_API inline ThreadTime ConvertRelativeTime(const ThreadTime& timeRelative)
+		{
+			return GetThreadTime() + timeRelative;
+		}
+
+		/// SetAssertionFailureFunction
+		/// Allows the user to specify a callback function to trap assertion failures.
+		/// You can use this to glue your own assertion system into this system.
+		typedef void (*AssertionFailureFunction)(const char* pExpression, void* pContext);
+		EATHREADLIB_API void SetAssertionFailureFunction(AssertionFailureFunction pAssertionFailureFunction, void* pContext);
+
+
+		/// AssertionFailure
+		/// Triggers an assertion failure. This function is generally intended for internal
+		/// use but is available so that related code can use the same system.
+		EATHREADLIB_API void AssertionFailure(const char* pExpression);
+		EATHREADLIB_API void AssertionFailureV(const char* pFormat, ...);
+
+
+
+
+		/// Allocator
+		/// This is the same as (the first four functions of) ICoreAllocator.
+		/// If the allocator is set via SetAllocator, then it must be done before
+		/// any other thread operations which might allocate memory are done. 
+		/// Typically this includes creating objects via factory functions and 
+		/// creating threads whereby you specify that thread resources be allocated for you..
+		class Allocator
+		{
+		public:
+			virtual ~Allocator() {}
+			virtual void* Alloc(size_t size, const char* name = 0, unsigned int flags = 0) = 0;
+			virtual void* Alloc(size_t size, const char* name, unsigned int flags,
+									unsigned int align, unsigned int alignOffset = 0) = 0;
+			virtual void Free(void* block, size_t size=0) = 0;
+		};
+
+		EATHREADLIB_API void       SetAllocator(Allocator* pAllocator);
+		EATHREADLIB_API Allocator* GetAllocator();
+
+		EATHREADLIB_API void SetAllocator(EA::Allocator::ICoreAllocator* pAllocator);
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+/// EAThreadGetUniqueId
+///
+/// Gets a value that is unique per thread but isn't necessarily the system-recognized
+/// thread id. This function is at least as fast as GetThreadId, and on some platforms
+/// is potentially significantly faster due to being implemented in inline asm which 
+/// avoids a system function call which may cause an instruction cache miss penalty.
+/// This function is useful for creating very fast implementations of some kinds of 
+/// threading constructs. It's implemented as a macro instead of a function in order
+/// to optimizing inlining success across all platforms and compilers.
+///
+/// This function is guaranteed to yield a valid value; there are no error conditions.
+///
+/// This macro acts as if it were declared as a function like this:
+///     void EAThreadGetUniqueId(ThreadUniqueId& result);
+///
+/// Example usage:
+///     ThreadUniqueId x;
+///     EAThreadGetUniqueId(x);
+///
+#if EA_USE_CPP11_CONCURRENCY
+	#define EAThreadGetUniqueId(dest) (dest = static_cast<uintptr_t>(std::hash<std::thread::id>()(std::this_thread::get_id())))
+
+#elif defined(EA_PLATFORM_WINDOWS) && defined(_MSC_VER) && !defined(_WIN64)
+
+	// Reference implementation:
+	//extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
+	//#define EAThreadGetUniqueId(dest) dest = (ThreadUniqueId)(uintptr_t)GetCurrentThreadId()
+
+	// Fast implementation:
+	extern "C" unsigned long __readfsdword(unsigned long offset);
+	#pragma intrinsic(__readfsdword)
+	#define EAThreadGetUniqueId(dest) dest = (EA::Thread::ThreadUniqueId)(uintptr_t)__readfsdword(0x18)
+
+#elif defined(_MSC_VER) && defined(EA_PROCESSOR_X86_64)
+	#pragma warning(push, 0)
+	#include <intrin.h>
+	#pragma warning(pop)
+	#define EAThreadGetUniqueId(dest) dest = (EA::Thread::ThreadUniqueId)(uintptr_t)__readgsqword(0x30)
+	// Could also use dest = (EA::Thread::ThreadUniqueId)NtCurrentTeb(), but that would require #including <windows.h>, which is very heavy.
+
+#else
+
+	// Reference implementation:
+	#define EAThreadGetUniqueId(dest) dest = (EA::Thread::ThreadUniqueId)(uintptr_t)EA::Thread::GetThreadId()
+
+#endif
+
+
+// EAThreadIdToString
+// Convert a thread id to a string suitable for use with printf like functions, e.g.:
+//      printf("%s", EAThreadIdToString(myThreadId));
+// This macro is intended for debugging purposes and makes no guarantees about performance 
+// or how a thread id is mapped to a string.
+namespace EA
+{
+	namespace Thread
+	{
+		namespace detail
+		{
+			struct EATHREADLIB_API ThreadIdToStringBuffer
+			{
+			public:
+				enum { BufSize = 32 };
+				explicit ThreadIdToStringBuffer(EA::Thread::ThreadId threadId);
+				const char* c_str() const { return mBuf; }
+			private:
+				char mBuf[BufSize];
+			};
+
+			struct EATHREADLIB_API SysThreadIdToStringBuffer
+			{
+			public:
+				enum { BufSize = 32 };
+				explicit SysThreadIdToStringBuffer(EA::Thread::SysThreadId sysThreadId);
+				const char* c_str() const { return mBuf; }
+			private:
+				char mBuf[BufSize];
+			};
+		}
+	}
+}
+
+#if !defined(EAThreadThreadIdToString)
+	#define EAThreadThreadIdToString(threadId)       (EA::Thread::detail::ThreadIdToStringBuffer(threadId).c_str())
+#endif
+#if !defined(EAThreadSysThreadIdToString)
+	#define EAThreadSysThreadIdToString(sysThreadId) (EA::Thread::detail::SysThreadIdToStringBuffer(sysThreadId).c_str())
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Inline functions
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+	// We implement GetSysThreadId in our associated .cpp file.
+#elif defined(EA_PLATFORM_SONY)
+	// We implement GetSysThreadId in our associated .cpp file.
+#elif defined(EA_PLATFORM_APPLE)
+	// We implement GetSysThreadId in our associated .cpp file.
+#elif EA_USE_CPP11_CONCURRENCY
+	// We implement GetSysThreadId in our associated .cpp file.
+#else
+	inline EA::Thread::SysThreadId EA::Thread::GetSysThreadId(ThreadId id)
+	{
+		return id;
+	}
+
+	inline EA::Thread::SysThreadId EA::Thread::GetSysThreadId()
+	{
+		return GetThreadId(); // ThreadId == SysThreadId in this case
+	}
+#endif
+
+#endif // EATHREAD_EATHREAD_H
+
+
+

+ 480 - 0
include/eathread/eathread_atomic.h

@@ -0,0 +1,480 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for thread-safe primitive operations.
+// 
+// EAThread atomics do NOT imply the use of read/write barriers.  This is 
+// partly due to historical reasons and partly due to EAThread's internal 
+// code being optimized for not using barriers.
+//
+// In future, we are considering migrating the atomics interface which  
+// defaults atomics to use full read/write barriers while allowing users
+// to opt-out of full barrier usage.  The new C++11 interface already provides
+// similar interfaces.
+//
+// http://en.cppreference.com/w/cpp/atomic/memory_order
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_ATOMIC_H
+#define EATHREAD_EATHREAD_ATOMIC_H
+
+
+#include <EABase/eabase.h>
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <stddef.h>
+EA_RESTORE_ALL_VC_WARNINGS()
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing. Let the default implementation below be used.
+//#elif defined(EA_USE_CPP11_CONCURRENCY) && EA_USE_CPP11_CONCURRENCY
+//    #include <eathread/cpp11/eathread_atomic_cpp11.h> // CPP11 atomics are currently broken and slow.  To be renabled for other platforms when VS2013 released.
+#elif defined(EA_USE_COMMON_ATOMICINT_IMPLEMENTATION) && EA_USE_COMMON_ATOMICINT_IMPLEMENTATION
+	#include <eathread/internal/eathread_atomic.h>
+#elif defined(EA_PLATFORM_APPLE)
+	#include <eathread/apple/eathread_atomic_apple.h>
+#elif defined(EA_PROCESSOR_X86) || ((defined(EA_PLATFORM_WINRT) || defined(EA_PLATFORM_WINDOWS_PHONE)) && defined(EA_PROCESSOR_ARM))
+	#include <eathread/x86/eathread_atomic_x86.h>
+#elif defined(EA_PROCESSOR_X86_64)
+	#include <eathread/x86-64/eathread_atomic_x86-64.h>
+#elif defined(EA_PLATFORM_ANDROID)
+	#if EATHREAD_C11_ATOMICS_AVAILABLE
+		#include <eathread/android/eathread_atomic_android_c11.h>  // Android API 21+ only support C11 atomics
+	#else
+		#include <eathread/android/eathread_atomic_android.h>
+	#endif
+#elif defined(EA_COMPILER_GCC) || defined(CS_UNDEFINED_STRING)
+	#include <eathread/gcc/eathread_atomic_gcc.h>
+#else
+	#error Platform not supported yet.
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+
+// EATHREAD_ATOMIC_128_SUPPORTED
+//
+// Defined as 0 or 1. Defined as 1 whenever possible for the given compiler/platform combination.
+// Defines if 128 bit atomic operations are supported.
+// Such operations are only ever supported on 64 bit platforms.
+//
+#ifndef EATHREAD_ATOMIC_128_SUPPORTED           // If not defined by one of the above headers...
+	#define EATHREAD_ATOMIC_128_SUPPORTED 0
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		enum Atomic64Implementation
+		{
+			kAtomic64Emulated,
+			kAtomic64Native
+		};
+
+		/// SetDoubleWordAtomicsImplementation
+		/// Some platforms have multiple implementations, some of which support
+		/// double word atomics and some that don't. For example, certain ARM
+		/// processors will support the ldrexd/strexd atomic instructions but
+		/// others will not. 
+		EATHREADLIB_API void SetAtomic64Implementation(Atomic64Implementation implementation);
+	}
+}
+
+
+#if !defined(EA_THREAD_ATOMIC_IMPLEMENTED) // If there wasn't a processor-specific version already defined...
+
+	// Fail the build if atomics aren't being defined for the given platform/compiler.
+	// If we need to add an exception here, we can add an appropriate ifdef.
+	static_assert(false, "atomic operations must be defined for this platform.");
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/// Standalone atomic functions
+			/// These act the same as the class functions below.
+			/// The T return values are the previous value, except for the
+			/// AtomicFetchSwap function which returns the swapped out value.
+			///
+			/// T    AtomicGetValue(volatile T*);
+			/// T    AtomicGetValue(const volatile T*);
+			/// void AtomicSetValue(volatile T*, T value);
+			/// T    AtomicFetchIncrement(volatile T*);
+			/// T    AtomicFetchDecrement(volatile T*);
+			/// T    AtomicFetchAdd(volatile T*, T value);
+			/// T    AtomicFetchSub(volatile T*, T value);
+			/// T    AtomicFetchOr(volatile T*, T value);
+			/// T    AtomicFetchAnd(volatile T*, T value);
+			/// T    AtomicFetchXor(volatile T*, T value);
+			/// T    AtomicFetchSwap(volatile T*, T value);
+			/// T    AtomicFetchSwapConditional(volatile T*, T value, T condition);
+			/// bool AtomicSetValueConditional(volatile T*, T value, T condition);
+
+
+
+			/// class AtomicInt
+			///
+			/// Implements thread-safe access to an integer and primary operations on that integer.
+			/// AtomicIntegers are commonly used as lightweight flags and signals between threads
+			/// or as the synchronization object for spinlocks. Those familiar with the Win32 API
+			/// will find that AtomicInt32 is essentially a platform independent interface to 
+			/// the Win32 InterlockedXXX family of functions. Those familiar with Linux may 
+			/// find that AtomicInt32 is essentially a platform independent interface to atomic_t 
+			/// functionality.
+			///
+			/// Note that the reference implementation defined here is itself not thread-safe.
+			/// A thread-safe version requires platform-specific code.
+			///
+			/// Example usage
+			///     AtomicInt32 i = 0;
+			///
+			///     ++i;
+			///     i--;
+			///     i += 7;
+			///     i -= 3;
+			///     i = 2;
+			///     
+			///     int x = i.GetValue();
+			///     i.Increment();
+			///     bool oldValueWas6 = i.SetValueConditional(3, 6);
+			///     i.Add(4);
+			///
+			template <class T>
+			class EATHREADLIB_API AtomicInt
+			{
+			public:
+				/// ThisType
+				/// A typedef for this class type itself, for usage clarity.
+				typedef AtomicInt<T> ThisType;
+
+
+				/// ValueType
+				/// A typedef for the basic object we work with. 
+				typedef T ValueType;
+
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				AtomicInt()
+					{}
+
+
+				/// AtomicInt
+				/// Constructs with an intial value. 
+				AtomicInt(ValueType n)
+					: mValue(n) {}
+
+
+				/// AtomicInt
+				/// Copy ctor. Uses GetValue to read the value, and thus is synchronized. 
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+
+				/// AtomicInt
+				/// Assignment operator. Uses GetValue to read the value, and thus is synchronized. 
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+
+				/// GetValue
+				/// Safely gets the current value. A platform-specific version of 
+				/// this might need to do something more than just read the value.
+				ValueType GetValue() const
+					{ return mValue; }
+
+
+				/// GetValueRaw
+				/// "Unsafely" gets the current value. This is useful for algorithms 
+				/// that want to poll the value in a high performance way before 
+				/// reading or setting the value in a more costly thread-safe way. 
+				/// You should not use this function when attempting to do thread-safe
+				/// atomic operations.
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+
+				/// SetValue
+				/// Safely sets a new value. Returns the old value. Note that due to 
+				/// expected multithreaded accesses, a call to GetValue after SetValue
+				/// might return a different value then what was set with SetValue.
+				/// This of course depends on your situation.
+				ValueType SetValue(ValueType n)
+				{
+					const ValueType nOldValue(mValue);
+					mValue = n;
+					return nOldValue;
+				}
+
+
+				/// SetValueConditional
+				/// Safely set the value to a new value if the original value is equal to 
+				/// a condition value. Returns true if the condition was met and the 
+				/// assignment occurred. The comparison and value setting are done as
+				/// an atomic operation and thus another thread cannot intervene between
+				/// the two as would be the case with simple C code.
+				bool SetValueConditional(ValueType n, ValueType condition)
+				{
+					if(mValue == condition) 
+					{
+						mValue = n;
+						return true;
+					}
+					return false;
+				}
+
+
+				/// Increment
+				/// Safely increments the value. Returns the new value.
+				/// This function acts the same as the C++ pre-increment operator.
+				ValueType Increment()
+					{ return ++mValue; }
+
+
+				/// Decrement
+				/// Safely decrements the value. Returns the new value.
+				/// This function acts the same as the C++ pre-decrement operator.
+				ValueType Decrement()
+					{ return --mValue; }
+
+
+				/// Add
+				/// Safely adds a value, which can be negative. Returns the new value.
+				/// You can implement subtraction with this function by using a negative argument.
+				ValueType Add(ValueType n)
+					{ return (mValue += n); }
+
+
+				/// operators
+				/// These allow an AtomicInt object to safely act like a built-in type.
+				///
+				/// Note: The operators for AtomicInt behaves differently than standard
+				///         C++ operators in that it will always return a ValueType instead
+				///         of a reference.
+				///
+				/// cast operator
+				/// Returns the AtomicInt value as an integral type. This allows the 
+				/// AtomicInt to behave like a standard built-in integer type.
+				operator const ValueType() const
+					 { return mValue; }
+
+				/// operator =
+				/// Assigns a new value and returns the value after the operation.
+				///
+				ValueType operator=(ValueType n)
+				{
+					 mValue = n;
+					 return n;
+				}
+
+				/// pre-increment operator+=
+				/// Adds a value to the AtomicInt and returns the value after the operation.
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				/// a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value + n, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator+=(ValueType n)
+				{
+					 mValue += n;
+					 return mValue;
+				}
+
+				/// pre-increment operator-=
+				/// Subtracts a value to the AtomicInt and returns the value after the operation.
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value - n, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator-=(ValueType n)
+				{
+					 mValue -= n;
+					 return mValue;
+				}
+
+				/// pre-increment operator++
+				/// Increments the AtomicInt. 
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value + 1, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator++()
+					 { return ++mValue; }
+
+				/// post-increment operator++
+				/// Increments the AtomicInt and returns the value of the AtomicInt before
+				/// the increment operation. 
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator++(int)
+					 { return mValue++; }
+
+				/// pre-increment operator--
+				/// Decrements the AtomicInt.
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value - 1, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator--()
+					 { return --mValue; }
+
+				/// post-increment operator--
+				/// Increments the AtomicInt and returns the value of the AtomicInt before
+				/// the increment operation. 
+				///
+				/// This function doesn't obey the C++ standard in that it does not return 
+				//  a reference, but rather the value of the AtomicInt after the  
+				/// operation is complete. It must be noted that this design is motivated by
+				/// the fact that it is unsafe to rely on the returned value being equal to 
+				/// the previous value, as another thread might have modified the AtomicInt 
+				/// immediately after the subtraction operation.  So rather than returning the
+				/// reference of AtomicInt, the function returns a copy of the AtomicInt value
+				/// used in the function.
+				ValueType operator--(int)
+					 { return mValue--;}
+
+			protected:
+				volatile ValueType mValue; /// May not be the same on all platforms.
+			};
+
+
+		} // namespace Thread
+
+	} // namespace EA
+
+#endif // #if EA_THREAD_ATOMIC_IMPLEMENTED
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+
+		// Typedefs
+		typedef AtomicInt<int32_t>  AtomicInt32;   /// int32_t  atomic integer.
+		typedef AtomicInt<uint32_t> AtomicUint32;  /// uint32_t atomic integer.
+		typedef AtomicInt<int64_t>  AtomicInt64;   /// int64_t  atomic integer.
+		typedef AtomicInt<uint64_t> AtomicUint64;  /// uint64_t atomic integer.
+
+		#if !defined(EA_PLATFORM_WORD_SIZE) || (EA_PLATFORM_WORD_SIZE == 4)
+			typedef AtomicInt32  AtomicIWord;
+			typedef AtomicUint32 AtomicUWord;
+		#else
+			typedef AtomicInt64  AtomicIWord;
+			typedef AtomicUint64 AtomicUWord;
+		#endif
+
+		#if !defined(EA_PLATFORM_PTR_SIZE) || (EA_PLATFORM_PTR_SIZE == 4)
+			typedef AtomicInt32  AtomicIntPtr;
+			typedef AtomicUint32 AtomicUintPtr;
+		#else
+			typedef AtomicInt64  AtomicIntPtr;
+			typedef AtomicUint64 AtomicUintPtr;
+		#endif
+
+
+		#ifdef _MSC_VER                  // VC++ yields spurious warnings about void* being cast to an integer type and vice-versa.
+			#pragma warning(push)        // These warnings are baseless because we check for platform pointer size above.
+			#pragma warning(disable: 4311 4312 4251)
+		#endif
+
+
+		/// class AtomicPointer
+		///
+		/// For simplicity of the current implementation, we simply have AtomicPointer map
+		/// to AtomicInt32. This is reasonably safe because AtomicInt32 uses intptr_t
+		/// as its ValueType and there are no foreseeble supported platforms in which 
+		/// intptr_t will not exist or be possible as a data type.
+		///
+		class EATHREADLIB_API AtomicPointer : public AtomicIntPtr
+		{
+		public:
+			typedef void* PointerValueType;
+
+			AtomicPointer(void* p = NULL)
+				: AtomicIntPtr(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p))) {}
+
+			AtomicPointer& operator=(void* p) 
+				{ AtomicIntPtr::operator=(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p))); return *this; }
+
+			operator const void*() const // It's debateable whether this should be supported.
+				{ return (void*)AtomicIntPtr::GetValue(); }
+
+			void* GetValue() const
+				{ return (void*)AtomicIntPtr::GetValue(); }
+
+			void* GetValueRaw() const
+				{ return (void*)AtomicIntPtr::GetValueRaw(); }
+
+			void* SetValue(void* p)
+				{ return (void*)AtomicIntPtr::SetValue(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p))); }
+
+			bool SetValueConditional(void* p, void* pCondition)
+				{ return AtomicIntPtr::SetValueConditional(static_cast<ValueType>(reinterpret_cast<uintptr_t>(p)), static_cast<ValueType>(reinterpret_cast<uintptr_t>(pCondition))); }
+		};
+
+
+		#ifdef _MSC_VER
+			#pragma warning(pop)
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // EATHREAD_EATHREAD_ATOMIC_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 249 - 0
include/eathread/eathread_barrier.h

@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements Posix-style barriers.
+// Note that thread synchronization barriers are different from 
+// memory synchronization barriers (a.k.a. fences).
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_BARRIER_H
+#define EATHREAD_EATHREAD_BARRIER_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'AtomicInt32' needs to have a
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EABarrierData
+///
+/// This is used internally by class Barrier.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+
+#if defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+	#include <eathread/internal/timings.h>
+
+	// We implement the barrier manually, as not all Posix thread implementations
+	// have barriers and even those that have it lack a timeout wait version.
+	struct EABarrierData{
+		ScePthreadCond  mCV;            // Wait for barrier.
+		ScePthreadMutex mMutex;         // Control access to barrier.
+		int             mnHeight;       // Number of threads required.
+		int             mnCurrent;      // Current number of threads. As threads wait, this value decreases towards zero.
+		unsigned long   mnCycle;        // Cycle count.
+		bool            mbValid;        // True if valid.
+
+		EABarrierData();
+	};
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+	#include <pthread.h>
+
+	// We implement the barrier manually, as not all Posix threads implemetnation 
+	// have barrers and even those that have it lack a timeout wait version.
+	struct EABarrierData{
+		pthread_cond_t  mCV;            // Wait for barrier.
+		pthread_mutex_t mMutex;         // Control access to barrier.
+		int             mnHeight;       // Number of threads required.
+		int             mnCurrent;      // Current number of threads. As threads wait, this value decreases towards zero.
+		unsigned long   mnCycle;        // Cycle count.
+		bool            mbValid;        // True if valid.
+
+		EABarrierData();
+	};
+
+#else // All other platforms
+	#include <eathread/eathread_atomic.h>
+	#include <eathread/eathread_semaphore.h>
+
+	struct EATHREADLIB_API EABarrierData{
+		EA::Thread::AtomicInt32    mnCurrent;       // Current number of threads. As threads wait, this value decreases towards zero.
+		int                        mnHeight;        // Number of threads required.
+		EA::Thread::AtomicInt32    mnIndex;         // Which semaphore we are using.
+		EA::Thread::Semaphore      mSemaphore0;     // First semaphore.     We can't use an array of Semaphores, because that would
+		EA::Thread::Semaphore      mSemaphore1;     // Second semaphore.    intefere with our ability to initialize them our way.
+		EABarrierData();
+
+	private:
+		// Prevent default generation of these functions by declaring but not defining them.
+		EABarrierData(const EABarrierData& rhs);               // copy constructor
+		EABarrierData& operator=(const EABarrierData& rhs);    // assignment operator
+	};
+
+#endif
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// BarrierParameters
+		/// Specifies barrier settings.
+		struct EATHREADLIB_API BarrierParameters
+		{
+			int  mHeight;        /// Barrier 'height'. Refers to number of threads which must wait before being released.
+			bool mbIntraProcess; /// True if the semaphore is intra-process, else inter-process.
+			char mName[16];      /// Barrier name, applicable only to platforms that recognize named synchronization objects.
+
+			BarrierParameters(int height = 0, bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// Barrier
+		/// A Barrier is a synchronization point for a set of threads. A barrier has
+		/// a count associated with it and threads call the wait function until the
+		/// given count of threads have reached the wait point. Then all threads 
+		/// are released. The first thread released is given a special return value
+		/// that identifies it uniquely so that one-time work can be done. 
+		///
+		/// A primary use of barriers is to spread out work between a number of threads 
+		/// and wait until the work is complete. For example, if you want to find and
+		/// count all objects of a given kind in a large grid, you might have four 
+		/// threads each work on a quadrant and wait on the barrier until all are
+		/// finished. This particular example is more practical on SMP systems than
+		/// uniprocessor systems, but there are also uniprocessor uses. It should be
+		/// noted, however, that a Barrier synchronizes the completion of -threads-, 
+		/// and not necessarily the completion of -tasks-. There may or may not be 
+		/// a direct correspondence between the two.
+		///
+		class EATHREADLIB_API Barrier
+		{
+		public:
+			enum Result{
+				kResultPrimary   =  0,  /// The barrier wait suceeded and this thread is the designated solitary primary thread. Similar to Posix "serial" thread.
+				kResultSecondary =  1,  /// The barrier wait suceeded and this thread is one of the secondary threads.
+				kResultError     = -1,  /// The wait resulted in error, due to various possible reasons.
+				kResultTimeout   = -2   /// The barrier wait timed out.
+			};
+
+			/// Barrier
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Barrier(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Barrier(NULL, false).
+			Barrier(const BarrierParameters* pBarrierParameters = NULL, bool bDefaultParameters = true);
+
+			/// Barrier
+			/// This is a constructor which initializes the Barrier to a specific height 
+			/// and intializes the other Barrier parameters to default values. See the
+			/// BarrierParameters struct for info on these default values.
+			Barrier(int height);
+
+			/// ~Barrier
+			/// Destroys an existing Barrier. The Barrier must not be waited on 
+			/// by any thread, otherwise the resulting behaviour is undefined.
+			~Barrier();
+
+			/// Init
+			/// Initializes the Barrier; used in cases where it cannot be initialized
+			/// via the constructor (as in the case with default construction or 
+			/// array initialization.
+			bool Init(const BarrierParameters* pBarrierParameters);
+
+			/// Wait
+			/// Causes the current thread to wait until the designated number of threads have called Wait. 
+			///
+			/// Returns one of enum Result.
+			///
+			/// A timeout means that the thread gives up its contribution to the height while 
+			/// waiting for the full height to be achieved. A timeout of zero means that a thread 
+			/// only succeeds if it is the final thread (the one which puts the height to full); 
+			/// otherwise the call returns with a timeout.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			Result Wait(const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mBarrierData; }
+
+		protected:
+			EABarrierData mBarrierData;
+
+		private:
+			// Objects of this class are not copyable.
+			Barrier(const Barrier&){}
+			Barrier& operator=(const Barrier&){ return *this; }
+		};
+
+
+		/// BarrierFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Barrier.
+		/// A primary use of this would be to allow the Barrier implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API BarrierFactory
+		{
+		public:
+			static Barrier*    CreateBarrier();                    // Internally implemented as: return new Barrier;
+			static void        DestroyBarrier(Barrier* pBarrier);  // Internally implemented as: delete pBarrier;
+
+			static size_t      GetBarrierSize();                   // Internally implemented as: return sizeof(Barrier);
+			static Barrier*    ConstructBarrier(void* pMemory);    // Internally implemented as: return new(pMemory) Barrier;
+			static void        DestructBarrier(Barrier* pBarrier); // Internally implemented as: pBarrier->~Barrier();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+   // re-enable warning(s) disabled above.
+   #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_BARRIER_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 347 - 0
include/eathread/eathread_callstack.h

@@ -0,0 +1,347 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EATHREAD_EATHREAD_CALLSTACK_H
+#define EATHREAD_EATHREAD_CALLSTACK_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <stddef.h>
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// CallstackContext
+		/// 
+		/// This is forward-declared here and fully declared at the bottom of this file.
+		///
+		struct CallstackContext;
+		struct Context;
+
+
+		/// InitCallstack
+		///
+		/// Allows the user to explicitly initialize the callstack mechanism.
+		/// Only the first call to InitCallstack will have effect. Calls to 
+		/// InitCallstack must be matched by calls to ShutdownCallstack.
+		///
+		EATHREADLIB_API void InitCallstack();
+
+
+		/// ShutdownCallstack
+		///
+		/// Allows the user to explicitly shutdown the callstack mechanism.
+		/// Calls to InitCallstack must be matched by calls to ShutdownCallstack.
+		/// The last call to ShutdownCallstack will shutdown and free the callstack mechanism.
+		///
+		EATHREADLIB_API void ShutdownCallstack();
+
+
+		/// GetCallstack
+		///
+		/// Gets the addresses of the calling instructions of a call stack.
+		/// If the CallstackContext parameter is used, then that execution context is used;
+		/// otherwise the current execution context is used.
+		/// The return value is the number of entries written to the callstack array.
+		/// The item at callstack[0] is from the function calling the GetCallstack function.
+		/// For most platforms the addresses reported are the addresses of the instruction 
+		/// that will next be executed upon returning from the function it is calling.
+		/// The maxDepth parameter must be at least one and callstack must be able to hold
+		/// at least one entry (a terminating 0 NULL entry).
+		///
+		EATHREADLIB_API size_t GetCallstack(void* callstack[], size_t maxDepth, const CallstackContext* pContext = NULL);
+
+
+		/// GetCallstack
+		///
+		/// Gets the callstack based on the thread id as opposed to register context.
+		///
+		#if defined(EA_PLATFORM_SONY)
+			EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, EA::Thread::ThreadId& pthread);
+		#endif
+
+
+
+
+		#if defined(EA_PLATFORM_MICROSOFT)
+			/// Microsoft thread handles are opaque types which are non-unique per thread.
+			/// That is, two different thread handles might refer to the same thread.
+			/// threadId is the same as EA::Thread::ThreadId and is a Microsoft thread HANDLE. 
+			/// This is not the same as a Microsoft DWORD thread id which is the same as EA::Thread::SysThreadId.
+			EATHREADLIB_API bool ThreadHandlesAreEqual(intptr_t threadId1, intptr_t threadId2);
+
+			/// This function is the same as EA::Thread::GetSysThreadId(ThreadId id).
+			/// This function converts from one type of Microsoft thread identifier to another.
+			/// threadId is the same as EA::Thread::ThreadId and is a Microsoft thread HANDLE. 
+			/// The return value is a Microsoft DWORD thread id which is the same as EA::Thread::SysThreadId.
+			/// Upon failure, the return value will be zero.
+			EATHREADLIB_API uint32_t GetThreadIdFromThreadHandle(intptr_t threadId);
+		#endif
+
+
+		/// GetCallstackContext
+		///
+		/// Gets the CallstackContext associated with the given thread.
+		/// The thread must be in a non-running state.
+		/// If the threadID is EAThread::kThreadIdInvalid, the current thread context is retrieved.
+		/// However, it's of little use to get the context of the current thread, since upon return
+		/// from the GetCallstackContext the data will not apply to the current thread any more;
+		/// thus this information is probably useful only for diagnostic purposes.
+		/// The threadId parameter is the same type as an EAThread ThreadId. It is important to 
+		/// note that an EAThread ThreadId under Microsoft platforms is a thread handle and not what 
+		/// Microsoft calls a thread id. This is by design as Microsoft thread ids are second class
+		/// citizens and likely wouldn't exist if it not were for quirks in the Windows API evolution.
+		///
+		/// Note that threadId is the same as EA::Thread::ThreadId and is a Microsoft thread HANDLE. 
+		/// This is not the same as a Microsoft DWORD thread id which is the same as EA::Thread::SysThreadId.
+		///
+		/// EACallstack has a general struct for each CPU type called Context, defined in EACallstack/Context.h. 
+		/// The Context struct contains the entire CPU register context information. In order to walk a thread's 
+		/// callstack, you really need only two or three of the register values from the Context. So there is a 
+		/// mini struct called CallstackContext which is just those registers needed to read a thread's callstack.
+		///
+		// ThreadId constants
+		#if EA_USE_CPP11_CONCURRENCY
+			EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, EA::Thread::ThreadId threadId);
+		#else
+			EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId = 0);
+		#endif
+
+
+		/// GetCallstackContextSysThreadId
+		///
+		/// This is the same as GetCallstackContext, except it uses what EAThread calls SysThreadId.
+		/// On Microsoft platforms a SysThreadId is a "thread id" whereas ThreadId is "thread handle."
+		/// On non-Microsoft platforms a SysThreadId is defined to be the same as ThreadId and is often
+		/// just an integer or opaque identifier (e.g. pthread).
+		/// This function exists because it may be more convenient to work with SysThreadIds in some cases.
+		/// You can convert from a ThreadId (Microsoft thread handle) to a SysThreadId (Microsoft thread id)
+		/// with the GetThreadIdFromThreadHandle function.
+		EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId = 0);
+
+
+		/// GetCallstackContext
+		///
+		/// Gets the CallstackContext from a full Context struct. Note that the Context struct
+		/// defines the entire machine context, whereas the CallstackContext is a tiny struct
+		/// with just a couple integer members and is all that's needed to describe a callstack.
+		///
+		EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext = NULL);
+
+
+		/// GetModuleFromAddress
+		///
+		/// Given an address, this function tells what module it comes from. 
+		/// The primary use of this is to tell what DLL an instruction pointer comes from.
+		/// Returns the required strlen of the pModuleFileName. If the return value is >= moduleNameCapacity,
+		/// there wasn't enough space. pModuleFileName is written with as many characters as possible
+		/// and will always be zero terminated. moduleNameCapacity must be at least one.
+		///
+		EATHREADLIB_API size_t GetModuleFromAddress(const void* pAddress, char* pModuleFileName, size_t moduleNameCapacity);
+
+
+		/// ModuleHandle
+		/// This is a runtime module identifier. For Microsoft Windows-like platforms
+		/// this is the same thing as HMODULE. For other platforms it is a shared library
+		/// runtime library pointer, id, or handle. For Microsoft platforms, each running
+		/// DLL has a module handle.
+		#if defined(EA_PLATFORM_MICROSOFT)
+			typedef void*            ModuleHandle;  // HMODULE, from LoadLibrary()
+		#elif defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE)
+			typedef void*            ModuleHandle;  // void*, from dlopen()
+		#else
+			typedef uintptr_t        ModuleHandle;
+		#endif
+
+
+		/// GetModuleHandleFromAddress
+		///
+		/// Returns the module handle from a code address.
+		/// Returns 0/NULL if no associated module could be found.
+		///
+		EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pAddress);
+
+
+		/// EAGetInstructionPointer
+		///
+		/// Returns the current instruction pointer (a.k.a. program counter).
+		/// This function is implemented as a macro, it acts as if its declaration 
+		/// were like so:
+		///     void EAGetInstructionPointer(void*& p);
+		///
+		/// For portability, this function should only be used as a standalone 
+		/// statement on its own line.
+		///
+		/// Example usage:
+		///    void* pInstruction;
+		///    EAGetInstructionPointer(pInstruction);
+		///
+		#if defined(_MSC_VER) && defined(EA_PROCESSOR_X86)
+			// We implement this via calling the next line of code as a function.
+			// Then we continue as if we were exiting that function but with no
+			// return statement. The result is that the instruction pointer will
+			// be placed on the stack and we merely pop it off the stack and 
+			// into a local variable.
+			#define EAGetInstructionPointer(p)   \
+			{                                    \
+				uintptr_t eip;                   \
+				__asm {                          \
+					__asm call GetEIP            \
+					__asm GetEIP:                \
+					__asm pop eip                \
+				}                                \
+				p = (void*)eip;                  \
+			}
+
+			EA_DISABLE_VC_WARNING(4740) 
+			inline void GetInstructionPointer(void*& p) 
+				{EAGetInstructionPointer(p);}
+			EA_RESTORE_VC_WARNING()
+
+		#elif defined(_MSC_VER) && (defined(EA_PROCESSOR_X86_64) || defined(EA_PROCESSOR_ARM))
+
+			EATHREADLIB_API EA_NO_INLINE void GetInstructionPointer(void*& p);
+
+			#define EAGetInstructionPointer(p) EA::Thread::GetInstructionPointer(p)
+
+		#elif defined(__ARMCC_VERSION) // ARM compiler
+
+			// Even if there are compiler intrinsics that let you get the instruction pointer, 
+			// this function can still be useful. For example, on ARM platforms this function
+			// returns the address with the 'thumb bit' set if it's thumb code. We need this info sometimes.
+			EATHREADLIB_API void GetInstructionPointer(void*& p);
+
+			// The ARM compiler provides a __current_pc() instrinsic, which returns an unsigned integer type.
+			#define EAGetInstructionPointer(p) { uintptr_t pc = (uintptr_t)__current_pc(); p = reinterpret_cast<void*>(pc); }
+
+		//#elif defined(EA_COMPILER_CLANG) // Disabled until implemented. The GCC code below works under clang, though it wouldn't if compiler extensions were disabled.
+		//    EATHREADLIB_API void GetInstructionPointer(void*& p);
+		//
+		//    // To do: implement this directly instead of via a call to GetInstructionPointer.
+		//    #define EAGetInstructionPointer(p) EA::Thread::GetInstructionPointer(p)
+			
+		#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG) // This covers EA_PLATFORM_UNIX, EA_PLATFORM_OSX 
+
+			// Even if there are compiler intrinsics that let you get the instruction pointer, 
+			// this function can still be useful. For example, on ARM platforms this function
+			// returns the address with the 'thumb bit' set if it's thumb code. We need this info sometimes.
+			EATHREADLIB_API void GetInstructionPointer(void*& p) __attribute__((noinline));
+
+			// It turns out that GCC has an extension that allows you to take the address 
+			// of a label. The code here looks a little wacky, but that's how it's done.
+			// Basically, this generates a global variable called 'label' and the assignment
+			// to 'p' reads that variable into p. One possible downside to this technique is
+			// that it relies on registers and global memory not being corrupted, yet one of
+			// reasons why we might want to be getting the instruction pointer is in dealing
+			// with some sort or processor exception which may be due to memory corruption.
+			// To consider: Make a version of this which calculates the value dynamically via asm.
+			#define EAGetInstructionPointer(p) EA::Thread::GetInstructionPointer(p)
+		#else
+			#error
+		#endif
+
+
+		/// EASetStackBase / SetStackBase / GetStackBase / GetStackLimit
+		///
+		/// EASetStackBase as a macro and acts as if its declaration were like so:
+		///     void EASetStackBase();
+		/// 
+		/// EASetStackBase sets the current stack pointer as the bottom (beginning)
+		/// of the stack. Depending on the platform, the "bottom" may be up or down
+		/// depending on whether the stack grows upward or downward (usually it grows
+		/// downward and so "bottom" actually refers to an address that is above child
+		/// stack frames in memory.
+		/// This function is intended to be called on application startup as early as 
+		/// possible, and in each created thread, as early as possible. Its purpose 
+		/// is to record the beginning stack pointer because the platform doesn't provide
+		/// APIs to tell what it is, and we need to know it (e.g. so we don't overrun
+		/// it during stack unwinds). 
+		///
+		/// For portability, EASetStackBase should be used only as a standalone 
+		/// statement on its own line, as it may include statements that can't work otherwise.
+		///
+		/// Example usage:
+		///    int main(int argc, char** argv) {
+		///       EASetStackBase();
+		///       . . .
+		///    }
+		///
+		/// SetStackBase is a function which lets you explicitly set a stack bottom instead
+		/// of doing it automatically with EASetStackBase. If you pass NULL for pStackBase
+		/// then the function uses its stack location during its execution, which will be 
+		/// a little less optimal than calling EASetStackBase.
+		///
+		/// GetStackBase returns the stack bottom set by EASetStackBase or SetStackBase.
+		/// It returns NULL if no stack bottom was set or could be set.
+		///
+		/// GetStackLimit returns the current stack "top", which will be lower than the stack
+		/// bottom in memory if the platform grows its stack downward.
+
+		EATHREADLIB_API void  SetStackBase(void* pStackBase);
+		inline          void  SetStackBase(uintptr_t pStackBase){ SetStackBase((void*)pStackBase); }
+		EATHREADLIB_API void* GetStackBase();
+		EATHREADLIB_API void* GetStackLimit();
+
+
+		#if defined(_MSC_VER) && defined(EA_PROCESSOR_X86)
+			#define EASetStackBase()               \
+			{                                      \
+				void* esp;                         \
+				__asm { mov esp, ESP }             \
+				::EA::Thread::SetStackBase(esp);   \
+			}                               
+
+		#elif defined(_MSC_VER) && (defined(EA_PROCESSOR_X86_64) || defined(EA_PROCESSOR_ARM))
+			// This implementation uses SetStackBase(NULL), which internally retrieves the stack pointer.
+			#define EASetStackBase()                     \
+			{                                            \
+				::EA::Thread::SetStackBase((void*)NULL); \
+			}                                            \
+
+		#elif defined(__ARMCC_VERSION)          // ARM compiler
+
+			#define EASetStackBase()  \
+				::EA::Thread::SetStackBase((void*)__current_sp())
+
+		#elif defined(__GNUC__) // This covers EA_PLATFORM_UNIX, EA_PLATFORM_OSX
+
+			#define EASetStackBase()  \
+				::EA::Thread::SetStackBase((void*)__builtin_frame_address(0));
+
+		#else
+			// This implementation uses SetStackBase(NULL), which internally retrieves the stack pointer.
+			#define EASetStackBase()                     \
+			{                                            \
+				::EA::Thread::SetStackBase((void*)NULL); \
+			}                                            \
+
+		#endif
+
+		#if defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE) || defined(EA_PLATFORM_SONY)
+			// GetPthreadStackInfo
+			//
+			// With some implementations of pthread, the stack base is returned by pthread as NULL if it's the main thread,
+			// or possibly if it's a thread you created but didn't call pthread_attr_setstack manually to provide your 
+			// own stack. It's impossible for us to tell here whether will be such a NULL return value, so we just do what
+			// we can and the user nees to beware that a NULL return value means that the system doesn't provide the 
+			// given information for the current thread. This function returns false and sets pBase and pLimit to NULL in 
+			// the case that the thread base and limit weren't returned by the system or were returned as NULL.
+
+			bool GetPthreadStackInfo(void** pBase, void** pLimit);
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // Header include guard.
+
+
+

+ 524 - 0
include/eathread/eathread_callstack_context.h

@@ -0,0 +1,524 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_EATHREAD_CALLSTACK_CONTEXT_H
+#define EATHREAD_EATHREAD_CALLSTACK_CONTEXT_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+#include <stddef.h>
+
+EA_DISABLE_VC_WARNING(4201)
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// CallstackContext
+		///
+		/// Processor-specific information that's needed to walk a call stack.
+		///
+		enum CallstackContextType
+		{
+			CALLSTACK_CONTEXT_UNKNOWN = 0,
+			CALLSTACK_CONTEXT_POWERPC,
+			CALLSTACK_CONTEXT_X86,
+			CALLSTACK_CONTEXT_X86_64,
+			CALLSTACK_CONTEXT_ARM,
+			CALLSTACK_CONTEXT_ARM64,
+			CALLSTACK_CONTEXT_MIPS,
+			CALLSTACK_CONTEXT_SPU,
+			NUMBER_OF_CALLSTACK_CONTEXT_TYPES
+		};
+
+		// NOTE: These context structures were moved to this header as of EAThread version 1.17.02
+		// TODO: We should evaluate if these really do belong here.
+
+		// The following are base values required for processor-agnostic offline stack dumping. 
+		// Not all implementations will fill them in, and most times only the base and pointer 
+		// will be filled. Also, most of the specific contexts' will have a member with the 
+		// same value as the stack pointer, i.e. mESP on the x86
+		struct CallstackContextBase
+		{
+			uintptr_t mStackBase;       /// Used to help tell what the valid stack ranges is. 0 if not used.
+			uintptr_t mStackLimit;      /// "
+			uintptr_t mStackPointer;    /// "
+
+			CallstackContextBase() : mStackBase(0), mStackLimit(0), mStackPointer(0) {}
+		};
+
+			struct CallstackContextPowerPC : public CallstackContextBase
+			{
+				uintptr_t mGPR1;        /// General purpose register 1.
+				uintptr_t mIAR;         /// Instruction address pseudo-register.
+				
+				CallstackContextPowerPC() : mGPR1(0), mIAR(0) {}
+			};
+
+			struct CallstackContextX86 : public CallstackContextBase
+			{
+				uint32_t mEIP;      /// Instruction pointer.
+				uint32_t mESP;      /// Stack pointer.
+				uint32_t mEBP;      /// Base pointer.
+
+				CallstackContextX86() : mEIP(0), mESP(0), mEBP(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_X86)
+			struct CallstackContext : public CallstackContextX86 
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_X86;
+			};
+		#endif
+
+			struct CallstackContextX86_64 : public CallstackContextBase
+			{
+				uint64_t mRIP;      /// Instruction pointer.
+				uint64_t mRSP;      /// Stack pointer.
+				uint64_t mRBP;      /// Base pointer.
+
+				CallstackContextX86_64() : mRIP(0), mRSP(0), mRBP(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_X86_64)
+			struct CallstackContext : public CallstackContextX86_64 
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_X86_64;
+			};
+		#endif
+
+			struct CallstackContextARM : public CallstackContextBase
+			{
+				uint32_t mFP;   /// Frame pointer; register 11 for ARM instructions, register 7 for Thumb instructions.
+				uint32_t mSP;   /// Stack pointer; register 13
+				uint32_t mLR;   /// Link register; register 14
+				uint32_t mPC;   /// Program counter; register 15
+				CallstackContextARM() : mFP(0), mSP(0), mLR(0), mPC(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_ARM32)
+			struct CallstackContext : public CallstackContextARM 
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_ARM;
+			};
+		#endif
+
+			struct CallstackContextARM64 : public CallstackContextBase
+			{
+				uint64_t mFP;   /// Frame pointer; register 29 
+				uint64_t mSP;   /// Stack pointer; register SP 
+				uint64_t mLR;   /// Link register; register 30 
+				uint64_t mPC;   /// Program counter; register PC 
+				CallstackContextARM64() : mFP(0), mSP(0), mLR(0), mPC(0) {}
+			};
+
+		#if defined(EA_PROCESSOR_ARM64)
+			struct CallstackContext : public CallstackContextARM64
+			{ 
+				static const CallstackContextType kType = CALLSTACK_CONTEXT_ARM64;
+			};
+		#endif
+
+			struct CallstackContextMIPS : public CallstackContextBase
+			{
+				uintptr_t mPC;      /// Program counter.
+				uintptr_t mSP;      /// Stack pointer.
+				uintptr_t mFP;      /// Frame pointer.
+				uintptr_t mRA;      /// Return address.
+
+				CallstackContextMIPS() : mPC(0), mSP(0), mFP(0), mRA(0) {}
+			};
+
+
+			struct CallstackContextSPU : public CallstackContextBase
+			{
+				uint32_t mGPR0;    /// General purpose register 0, word 0: return address. If this is zero then we can still read a call stack, but simply lose the first entry.
+				uint32_t mGPR1;    /// General purpose register 1, word 0: caller stack frame address. This is required to be set in order to read the call stack properly.
+
+				CallstackContextSPU() : mGPR0(0), mGPR1(0) {}
+			};
+
+
+		union VMXRegister
+		{
+			uint8_t  mByte    [16 / sizeof(uint8_t )];
+			uint16_t mHalfword[16 / sizeof(uint16_t)];
+			uint32_t mWord    [16 / sizeof(uint32_t)];
+			uint64_t mDword   [16 / sizeof(uint64_t)];  // Some VMX implementations don't support 64 bit integers.
+			float    mFloat   [16 / sizeof(float)];
+			double   mDouble  [16 / sizeof(double)];    // Some VMX implementations don't support 64 bit doubles.
+		};
+
+
+
+
+		/// ContextPowerPC32
+		///
+		/// This is a generic 32 bit PowerPC with VMX context.
+		///
+		struct ContextPowerPC32
+		{
+			uint32_t    mGpr[32];    // General registers 0..31
+			uint32_t    mCr;         // Condition register
+			uint32_t    mXer;        // Fixed point exception register
+			uint32_t    mLr;         // Link register
+			uint32_t    mCtr;        // Count register low
+			uint32_t    mCtrHigh;    // Count register high
+			uint32_t    mIar;        // Instruction address register
+			uint32_t    mMsr;        // Machine status register
+			double      mFpr[32];    // Floating registers 0..31
+			double      mFpscr;      // Floating point status/control reg
+			VMXRegister mVr[32];     // Vector registers 0..127
+			VMXRegister mVscr;       // Vector status/control register
+
+		}; // ContextPowerPC32
+
+
+
+
+		/// ContextPowerPC64
+		///
+		/// This is a generic 64 bit PowerPC with VMX context.
+		///
+		struct ContextPowerPC64
+		{
+			uint64_t    mGpr[32];    // General registers 0..31
+			uint64_t    mCr;         // Condition register
+			uint64_t    mXer;        // Fixed point exception register
+			uint64_t    mLr;         // Link register
+			uint64_t    mCtr;        // Count register
+			uint64_t    mIar;        // Instruction address register
+			uint64_t    mMsr;        // Machine status register
+			double      mFpr[32];    // Floating registers 0..31
+			double      mFpscr;      // Floating point status/control reg
+			VMXRegister mVr[32];     // Vector registers 0..127
+			VMXRegister mVscr;       // Vector status/control register
+
+		}; // ContextPowerPC64
+
+
+
+
+		/// ContextX86
+		///
+		/// Generic Intel x86 context.
+		/// This is a duplicate of the CONTEXT structure defined by Microsoft in WinNT.h.
+		///
+		struct ContextX86
+		{
+			uint32_t   ContextFlags;
+
+			uint32_t   Dr0;
+			uint32_t   Dr1;
+			uint32_t   Dr2;
+			uint32_t   Dr3;
+			uint32_t   Dr6;
+			uint32_t   Dr7;
+
+			// FLOATING_SAVE_AREA
+			uint32_t   Controluint32_t;
+			uint32_t   Statusuint32_t;
+			uint32_t   Taguint32_t;
+			uint32_t   ErrorOffset;
+			uint32_t   ErrorSelector;
+			uint32_t   DataOffset;
+			uint32_t   DataSelector;
+			uint8_t    RegisterArea[80];
+			uint32_t   Cr0NpxState;
+
+			uint32_t   SegGs;
+			uint32_t   SegFs;
+			uint32_t   SegEs;
+			uint32_t   SegDs;
+
+			uint32_t   Edi;
+			uint32_t   Esi;
+			uint32_t   Ebx;
+			uint32_t   Edx;
+			uint32_t   Ecx;
+			uint32_t   Eax;
+
+			uint32_t   Ebp;
+			uint32_t   Eip;
+			uint32_t   SegCs;
+			uint32_t   EFlags;
+			uint32_t   Esp;
+			uint32_t   SegSs;
+
+			uint8_t    ExtendedRegisters[512];
+
+		}; // ContextX86
+
+		#ifdef EA_PROCESSOR_X86 // Win32, Linux, OSX.
+			struct Context : public ContextX86
+			{
+				// Empty
+			};
+		#endif
+
+
+
+		/// ContextX86_64
+		///
+		/// Generic Intel x86-64 context.
+		/// This is a duplicate of the CONTEXT structure defined 
+		/// by Microsoft in WinNT.h in VC8 and later.
+		///
+		EA_PREFIX_ALIGN(16)
+		struct M128A_
+		{
+			uint64_t Low;
+			int64_t  High;
+		}EA_POSTFIX_ALIGN(16);
+
+		struct XMM_SAVE_AREA32_
+		{
+			uint16_t  ControlWord;
+			uint16_t  StatusWord;
+			uint8_t   TagWord;
+			uint8_t   Reserved1;
+			uint16_t  ErrorOpcode;
+			uint32_t  ErrorOffset;
+			uint16_t  ErrorSelector;
+			uint16_t  Reserved2;
+			uint32_t  DataOffset;
+			uint16_t  DataSelector;
+			uint16_t  Reserved3;
+			uint32_t  MxCsr;
+			uint32_t  MxCsr_Mask;
+			M128A_    FloatRegisters[8];
+			M128A_    XmmRegisters[16];
+			uint8_t   Reserved4[96];
+		};
+
+		EA_PREFIX_ALIGN(16) struct ContextX86_64
+		{
+			uint64_t P1Home;
+			uint64_t P2Home;
+			uint64_t P3Home;
+			uint64_t P4Home;
+			uint64_t P5Home;
+			uint64_t P6Home;
+
+			uint32_t ContextFlags;
+			uint32_t MxCsr;
+
+			uint16_t SegCs;
+			uint16_t SegDs;
+			uint16_t SegEs;
+			uint16_t SegFs;
+			uint16_t SegGs;
+			uint16_t SegSs;
+			uint32_t EFlags;
+
+			uint64_t Dr0;
+			uint64_t Dr1;
+			uint64_t Dr2;
+			uint64_t Dr3;
+			uint64_t Dr6;
+			uint64_t Dr7;
+
+			uint64_t Rax;
+			uint64_t Rcx;
+			uint64_t Rdx;
+			uint64_t Rbx;
+			uint64_t Rsp;
+			uint64_t Rbp;
+			uint64_t Rsi;
+			uint64_t Rdi;
+			uint64_t R8;
+			uint64_t R9;
+			uint64_t R10;
+			uint64_t R11;
+			uint64_t R12;
+			uint64_t R13;
+			uint64_t R14;
+			uint64_t R15;
+
+			uint64_t Rip;
+
+			union {
+				XMM_SAVE_AREA32_ FltSave;
+
+				struct {
+					M128A_ Header[2];
+					M128A_ Legacy[8];
+					M128A_ Xmm0;
+					M128A_ Xmm1;
+					M128A_ Xmm2;
+					M128A_ Xmm3;
+					M128A_ Xmm4;
+					M128A_ Xmm5;
+					M128A_ Xmm6;
+					M128A_ Xmm7;
+					M128A_ Xmm8;
+					M128A_ Xmm9;
+					M128A_ Xmm10;
+					M128A_ Xmm11;
+					M128A_ Xmm12;
+					M128A_ Xmm13;
+					M128A_ Xmm14;
+					M128A_ Xmm15;
+				} DUMMYSTRUCTNAME;
+			} DUMMYUNIONNAME;
+
+			M128A_   VectorRegister[26];
+			uint64_t VectorControl;
+
+			uint64_t DebugControl;
+			uint64_t LastBranchToRip;
+			uint64_t LastBranchFromRip;
+			uint64_t LastExceptionToRip;
+			uint64_t LastExceptionFromRip;
+
+		}; // ContextX86_64
+
+		#ifdef EA_PROCESSOR_X86_64
+			struct Context : public ContextX86_64
+			{
+				// Empty
+			};
+		#endif
+
+
+
+
+		union DoubleFloat
+		{
+			double   d64;
+			float    f32[2];
+			uint64_t u64;
+			uint32_t u32[2];
+		};
+
+
+		/// ContextARM
+		///
+		/// Generic ARM processor context.
+		/// There are many variations of ARM processors, so one context can't 
+		/// address them all. We assume an ARM 7 with VFPv3 here, which is the
+		/// latest we use as of 2010.
+		/// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0041c/ch09s02s02.html
+		/// http://www.arm.com/products/processors/technologies/vector-floating-point.php
+		///
+		/// mGpr[0]   Volatile register. Argument1, return value.
+		/// mGpr[1]   Volatile register. Argument2, Second 32-bits if double/int Return Value
+		/// mGpr[2]   Volatile register. Argument3.
+		/// mGpr[3]   Volatile register. Argument4. Further arguments are put on the stack.
+		/// mGpr[4]   Permanent register.
+		/// mGpr[5]   Permanent register.
+		/// mGpr[6]   Permanent register.
+		/// mGpr[7]   Permanent register. Thumb instruction set frame pointer.
+		/// mGpr[8]   Permanent register.
+		/// mGpr[9]   Permanent register. Has platform-specific uses. On iOS it's reserved for the OS.
+		/// mGpr[10]  Permanent register. SL (Stack limit, in some uses)
+		/// mGpr[11]  Permanent register. ARM instruction set frame pointer, except for Apple/iOS where it's general purpose.
+		/// mGpr[12]  Permanent register. IP (scratch register/new-sb in inter-link-unit calls)
+		/// mGpr[13]  Permanent register. SP (Stack pointer)
+		/// mGpr[14]  Permanent register. LR (Link register)
+		/// mGpr[15]  Permanent register. PC (Program Counter)
+
+		struct ContextARM
+		{
+			uint32_t    mGpr[16];           // General registers.
+			uint32_t    mCpsr;              // Current program status register.
+			uint32_t    mSpsr;              // Saved program status register.
+			uint32_t    mFpscr;             // Floating point status condition register.
+			DoubleFloat mDoubleFloat[32];   // If these are present, the device will have either 16 (VFPv3-D16) or 32 (VFPv3-D32) registers.
+
+		}; // ContextARM
+
+		#ifdef EA_PROCESSOR_ARM32
+			struct Context : public ContextARM
+			{
+				// Empty
+			};
+		#endif
+
+		/// ContextARM64
+		///
+		/// Generic ARM64 processor context.
+		/// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf (page 14)
+		/// https://en.wikipedia.org/wiki/Aarch64#AArch64 (optional reading)
+		/// http://lxr.free-electrons.com/source/arch/arm64/include/uapi/asm/sigcontext.h
+		///
+		/// mGpr[0]   Volatile register. Argument1, return value.
+		/// mGpr[1]   Volatile register. Argument2, Second 32-bits if double/int Return Value (update)
+		/// mGpr[2]   Volatile register. Argument3.
+		/// mGpr[3]   Volatile register. Argument4. 
+		/// mGpr[4]   Volatile register. Argument5. 
+		/// mGpr[5]   Volatile register. Argument6. 
+		/// mGpr[6]   Volatile register. Argument7. 
+		/// mGpr[7]   Volatile register. Argument8. 
+		/// mGpr[8]   Permanent register. syscall number is in r8.
+		/// mGpr[9]   Volatile register. Temporary data.
+		/// mGpr[10]  Volatile register. Temporary data.
+		/// mGpr[11]  Volatile register. Temporary data.
+		/// mGpr[12]  Volatile register. Temporary data.
+		/// mGpr[13]  Volatile register. Temporary data.
+		/// mGpr[14]  Volatile register. Temporary data.
+		/// mGpr[15]  Volatile register. Temporary data.
+		/// mGpr[16]  Permanent register. IP0 (scratch register/new-sb in inter-link-unit calls)
+		/// mGpr[17]  Permanent register. IP1 (scratch register/new-sb in inter-link-unit calls)
+		/// mGpr[18]  Permanent register. Has platform-specific uses. On iOS it's reserved for the OS.
+		/// mGpr[19]  Callee-saved register. 
+		/// mGpr[20]  Callee-saved register. 
+		/// mGpr[21]  Callee-saved register. 
+		/// mGpr[22]  Callee-saved register. 
+		/// mGpr[23]  Callee-saved register. 
+		/// mGpr[24]  Callee-saved register. 
+		/// mGpr[25]  Callee-saved register. 
+		/// mGpr[26]  Callee-saved register. 
+		/// mGpr[27]  Callee-saved register. 
+		/// mGpr[28]  Callee-saved register. 
+		/// mGpr[29]  Permanent register. FP (Frame pointer)
+		/// mGpr[30]  Permanent register. LR (Link register)
+		/// mGpr[31]  Permanent register. SP (Stack pointer)
+		///
+		/// Program Counter is not a General Purpose Register 
+		/// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0801a/BABGHBJC.html
+		EA_PREFIX_ALIGN(16)
+		struct ContextARM64
+		{
+			uint64_t    mGpr[32];           // General registers.
+			uint64_t    mPC;                // Program counter.
+			uint64_t    mNzcv;              // Global condition register.
+			uint32_t    mFpsr;              // Floating point status register.
+			uint32_t    mFpcr;              // Floating point condition register.
+			union
+			{
+				uint8_t  mByteArray  [512];                     // Access Neon registers as raw bytes.
+				double   mDoubleArray[512/sizeof(double)];      // Access Neon registers as doubles
+				float    mFloatArray [512/sizeof(float)];       // Access Neon registers as floats
+				uint16_t mUInt16Array[512/sizeof(uint16_t)];    // Access Neon registers as uint16_t's
+				uint32_t mUInt32Array[512/sizeof(uint32_t)];    // Access Neon registers as uint32_t's
+				uint64_t mUInt64Array[512/sizeof(uint64_t)];    // Access Neon registers as uint64_t's
+			} mNeon;
+			uint32_t mPadding[2]; // required to avoid warning 4324 on vc
+		}EA_POSTFIX_ALIGN(16);// ContextARM64
+
+		#ifdef EA_PROCESSOR_ARM64
+			struct Context : public ContextARM64
+			{
+				// Empty
+			};
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+EA_RESTORE_VC_WARNING()
+
+#endif // Header include guard.
+
+
+

+ 254 - 0
include/eathread/eathread_condition.h

@@ -0,0 +1,254 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a condition variable in the style of Posix condition variables 
+// and Java and C# thread Monitors (Java objects and C# monitors have built-in 
+// locks and pthreads condition variables and EAThread::Conditions and Posix
+// condition variables do not. A Condition is usually the appropriate thread 
+// synchronization mechanism for producer/consumer situations whereby one
+// or more threads create data for one or more other threads to work on,
+// such as is the case with a message queue.    
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_CONDITION_H
+#define EATHREAD_EATHREAD_CONDITION_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_mutex.h>
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'EA::Thread::simple_list<T>' needs to have
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EAConditionData
+///
+/// This is used internally by class Condition.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+#if defined(EA_PLATFORM_SONY)
+	// Condition variables are built into Posix/Unix.
+	#include <kernel.h>
+	#include <eathread/internal/timings.h>
+
+	struct EAConditionData
+	{
+		ScePthreadCond mCV;
+		EAConditionData();
+	};
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+	// Condition variables are built into Posix/Unix.
+	#include <pthread.h>
+
+	struct EAConditionData
+	{
+		pthread_cond_t mCV;
+		EAConditionData();
+	};
+
+#else // All other platforms
+	#include <eathread/eathread_semaphore.h>
+	#include <eathread/eathread_atomic.h>
+
+	struct EATHREADLIB_API EAConditionData
+	{
+		EA::Thread::AtomicInt32 mnWaitersBlocked;
+		int                     mnWaitersToUnblock;
+		int                     mnWaitersDone;
+		EA::Thread::Semaphore   mSemaphoreBlockQueue;
+		EA::Thread::Semaphore   mSemaphoreBlockLock;
+		EA::Thread::Mutex       mUnblockLock;
+
+		EAConditionData();
+
+	private:
+		// Prevent default generation of these functions by declaring but not defining them.
+		EAConditionData(const EAConditionData& rhs);             // copy constructor
+		EAConditionData& operator=(const EAConditionData& rhs);  // assignment operator
+	};
+
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+#if defined(EA_PLATFORM_SONY)
+		static const int CONDITION_VARIABLE_NAME_LENGTH_MAX = 31;
+#else
+		static const int CONDITION_VARIABLE_NAME_LENGTH_MAX = 15;
+#endif
+		/// ConditionParameters
+		/// Specifies condition variable settings.
+		struct EATHREADLIB_API ConditionParameters
+		{
+			bool mbIntraProcess;										/// True if the Condition is intra-process, else inter-process.
+			char mName[CONDITION_VARIABLE_NAME_LENGTH_MAX + 1];			/// Condition name, applicable only to platforms that recognize named synchronization objects.
+
+			ConditionParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// Condition
+		/// Implements a condition variable thread synchronization primitive. A condition variable is usually the 
+		/// appropriate thread synchronization mechanism for producer/consumer situations whereby one or more 
+		/// threads create data for one or more other threads to work on, such as is the case with a message queue. 
+		///
+		/// To use a condition variable to wait for resource, you Lock the Mutex for that resource, then (in a loop)
+		/// check and Wait on a condition variable that you associate with the mutex. Upon calling Wait, 
+		/// the Lock will be released so that other threads can adjust the resource. Upon return from Wait,
+		/// the Mutex is re-locked for the caller. To use a Condition to signal a change in something, you simply
+		/// call the Signal function. In the case of Signal(false), one blocking waiter will be released,
+		/// whereas with Signal(true), all blocking waiters will be released. Upon release of single or multiple
+		/// waiting threads, the Lock is contested for by all of them, so in the case or more than one waiter,
+		/// only one will immediately come away with ownership of the lock.
+		class EATHREADLIB_API Condition
+		{
+		public:
+			enum Result
+			{
+				kResultOK      =  0,
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			/// Condition
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Condition(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Condition(NULL, false).
+			Condition(const ConditionParameters* pConditionParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~Condition
+			/// Destroys the Condition object. If any threads that are blocking while waiting on 
+			/// while the Condition is destroyed, the resulting behaviour is undefined.
+			~Condition();
+
+			/// Init
+			/// Initializes the Condition.
+			bool Init(const ConditionParameters* pConditionParameters);
+
+			/// Wait
+			/// Waits for the Condition with timeout. You must have a Mutex 
+			/// (that you conceptually associate with the resource) locked before
+			/// calling this function or else the resulting behaviour is undefined.
+			/// Within a while loop, check the resource state and call Wait if the 
+			/// necessary condition is not met.
+			///
+			/// The call to Wait associates the Condition with your mutex, so it can
+			/// then unlock the mutex/resource (allows another thread to fill the resource).
+			///
+			/// Upon non-error return of Wait, the mutex will be re-locked by the calling 
+			/// thread, even if the result is a timeout. Upon returning from wait, before 
+			/// doing any processing as a result of a Signal, your loop should always re-check
+			/// the resource state. The Posix Wait specification explicitly notes
+			/// that uncommon 'spurious wakeups' are possible and so should be tested
+			/// for. It impossible to test for a spurious wakeup from within this Wait
+			/// function, as this function can't know the resource state that caused the 
+			/// Signal to occur.
+			///
+			/// It should be noted that upon a kResultOK return from Wait, the user should
+			/// not assume that what the user was waiting on is still available. The signaling
+			/// of a Condition should be considered merely a hint to the waiter that the user
+			/// can probably proceed. Also, the user should usually call Wait only if the 
+			/// user has nothing to wait for; the user should check for this before calling Wait.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			Result Wait(Mutex* pMutex, const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Signal
+			/// Releases one or all waiters, depending on the input 'bBroadcast' argument.
+			/// The waiters will then contest for the Lock.
+			bool Signal(bool bBroadcast = false);
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mConditionData; }
+
+		protected:
+			EAConditionData mConditionData;
+
+		private:
+			// Objects of this class are not copyable.
+			Condition(const Condition&){}
+			Condition& operator=(const Condition&){ return *this; }
+		};
+
+
+		/// ConditionFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Condition.
+		/// A primary use of this would be to allow the Condition implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ConditionFactory
+		{
+		public:
+			static Condition* CreateCondition();                        // Internally implemented as: return new Condition;
+			static void       DestroyCondition(Condition* pCondition);  // Internally implemented as: delete pCondition;
+
+			static size_t     GetConditionSize();                       // Internally implemented as: return sizeof(Condition);
+			static Condition* ConstructCondition(void* pMemory);        // Internally implemented as: return new(pMemory) Condition;
+			static void       DestructCondition(Condition* pCondition); // Internally implemented as: pCondition->~Condition();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// re-enable warning 4251 (it's a level-1 warning and should not be suppressed globally)
+	#pragma warning(pop)
+#endif
+
+
+
+#endif // EATHREAD_EATHREAD_CONDITION_H
+
+
+
+
+
+
+
+
+
+

+ 797 - 0
include/eathread/eathread_futex.h

@@ -0,0 +1,797 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a fast, user-space mutex. Also known as a lightweight mutex.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_FUTEX_H
+#define EATHREAD_EATHREAD_FUTEX_H
+
+#include <eathread/eathread.h>
+#include <eathread/eathread_atomic.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_mutex.h>
+#include <stddef.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_MANUAL_FUTEX_ENABLED
+//
+// Defined as 0 or 1.
+// If enabled then Futex is implemented with atomics and semaphores instead of
+// via a direct system-supported lightweight mutex implementation.
+//
+#ifndef EATHREAD_MANUAL_FUTEX_ENABLED
+	#if defined(EA_PLATFORM_MICROSOFT)              // VC++ has CriticalSection, which is a futex.
+		#define EATHREAD_MANUAL_FUTEX_ENABLED 0     // Currently 0 because that's best. Can be set to 1.
+	#elif defined(EA_PLATFORM_SONY)
+		#define EATHREAD_MANUAL_FUTEX_ENABLED 0    // Allows us to have a spin count.        
+	#else
+		#define EATHREAD_MANUAL_FUTEX_ENABLED 1     // Set to 1 until we can resolve any dependencies such as PPMalloc.
+	#endif
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_FUTEX_SPIN_COUNT
+//
+#ifndef EATHREAD_FUTEX_SPIN_COUNT
+	#define EATHREAD_FUTEX_SPIN_COUNT 256 
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// Futex data
+///
+/// This is used internally by class Futex.
+/// Note that we don't use an EAThread semaphore, as the direct semaphore
+/// we use here is more direct and avoid inefficiencies that result from 
+/// the possibility of EAThread semaphores being optimized for being 
+/// standalone.
+/// 
+#if !EA_THREADS_AVAILABLE
+	#define EA_THREAD_NONTHREADED_FUTEX 1
+
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		struct EAFutexSemaphore
+		{
+			int mnCount;
+		};
+	#endif
+
+#elif EA_USE_CPP11_CONCURRENCY
+	EA_DISABLE_VC_WARNING(4265 4365 4836 4571 4625 4626 4628 4193 4127 4548)
+	#include <mutex>
+	EA_RESTORE_VC_WARNING()
+
+#elif defined(__APPLE__)
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		#include <eathread/eathread_semaphore.h>
+		typedef EA::Thread::Semaphore EAFutexSemaphore;
+	#endif
+
+#elif defined(EA_PLATFORM_SONY)
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		#include <kernel/semaphore.h>
+		#include <eathread/internal/timings.h>
+
+		typedef SceKernelSema EAFutexSemaphore;        
+	#endif
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		#include <semaphore.h>
+		typedef sem_t EAFutexSemaphore;
+	#endif
+
+#elif defined(EA_PLATFORM_MICROSOFT)
+
+	// We avoid #including heavy system headers, as this file is a common header itself.
+
+		extern "C"
+		{
+			#if defined(EA_COMPILER_GNUC)
+				// Mingw declares these slightly differently.
+				struct _CRITICAL_SECTION;
+				__declspec(dllimport) int           __stdcall InitializeCriticalSectionAndSpinCount(_CRITICAL_SECTION* pCriticalSection, unsigned long dwSpinCount);
+				__declspec(dllimport) void          __stdcall InitializeCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall DeleteCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall EnterCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall LeaveCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) int           __stdcall TryEnterCriticalSection(_CRITICAL_SECTION* pCriticalSection);
+			#else
+				#if !defined _Must_inspect_result_ 
+					#define _Must_inspect_result_
+				#endif
+
+				struct _RTL_CRITICAL_SECTION;
+				__declspec(dllimport) _Must_inspect_result_ int           __stdcall InitializeCriticalSectionAndSpinCount(_Out_ _RTL_CRITICAL_SECTION* pCriticalSection, _In_ unsigned long dwSpinCount);
+				__declspec(dllimport) void          __stdcall InitializeCriticalSection(_Out_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall DeleteCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall EnterCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) void          __stdcall LeaveCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+				__declspec(dllimport) int           __stdcall TryEnterCriticalSection(_Inout_ _RTL_CRITICAL_SECTION* pCriticalSection);
+			#endif
+
+			__declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
+		}
+
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		typedef void* EAFutexSemaphore; // void* instead of HANDLE to avoid #including windows headers.
+	#endif
+
+#else
+	#define EA_THREAD_NONTHREADED_FUTEX 1
+
+	#if EATHREAD_MANUAL_FUTEX_ENABLED
+		struct EAFutexSemaphore
+		{
+			int mnCount;
+		};
+	#endif
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		#if defined(_WIN64)
+			static const int FUTEX_PLATFORM_DATA_SIZE = 40; // CRITICAL_SECTION is 40 bytes on Win64.
+		#elif defined(_WIN32)
+			static const int FUTEX_PLATFORM_DATA_SIZE = 32; // CRITICAL_SECTION is 24 bytes on Win32 and 28 bytes on XBox 360.
+		#endif
+
+
+		/// class Futex
+		///
+		/// A Futex is a fast user-space mutex. It works by attempting to use
+		/// atomic integer updates for the common case whereby the mutex is
+		/// not already locked. If the mutex is already locked then the futex
+		/// drops down to waiting on a system-level semaphore. The result is 
+		/// that uncontested locking operations can be significantly faster 
+		/// than contested locks. Contested locks are slightly slower than in 
+		/// the case of a formal mutex, but usually not by much.
+		///
+		/// The Futex acts the same as a conventional mutex with respect to  
+		/// memory synchronization. Specifically: 
+		///     - A Lock or successful TryLock implies a read barrier (i.e. acquire).
+		///     - A second lock by the same thread implies no barrier.
+		///     - A failed TryLock implies no barrier.
+		///     - A final unlock by a thread implies a write barrier (i.e. release).
+		///     - A non-final unlock by a thread implies no barrier.
+		///
+		/// Futex limitations relative to Mutexes:
+		///     - Futexes cannot be inter-process.
+		///     - Futexes cannot be named.
+		///     - Futexes cannot participate in condition variables. A special 
+		///       condition variable could be made that works with them, though.
+		///     - Futex locks don't have timeouts. This could probably be
+		///       added with some work, though users generally shouldn't need timeouts. 
+		///
+		class EATHREADLIB_API Futex
+		{
+		public:
+			enum Result
+			{
+				kResultTimeout = -2
+			};
+
+			/// Futex
+			///
+			/// Creates a Futex. There are no creation options.
+			///
+			Futex();
+
+			/// ~Futex
+			///
+			/// Destroys an existing futex. The futex must not be locked by any thread
+			/// upon this call, otherwise the resulting behaviour is undefined.
+			///
+			~Futex();
+
+			/// TryLock
+			///
+			/// Tries to lock the futex; returns true if possible.
+			/// This function always returns immediately. It will return false if 
+			/// the futex is locked by another thread, and it will return true 
+			/// if the futex is not locked or is already locked by the current thread.
+			///
+			bool TryLock();
+
+			/// Lock
+			///
+			/// Locks the futex; returns the new lock count.
+			/// If the futex is locked by another thread, this call will block until
+			/// the futex is unlocked. If the futex is not locked or is locked by the
+			/// current thread, this call will return immediately.
+			///
+			void Lock();
+
+			/// Lock
+			///
+			/// Tries to lock the futex until the given time.
+			/// If the futex is locked by another thread, this call will block until
+			/// the futex is unlocked or the given time has passed. If the futex is not locked
+			/// or is locked by the current thread, this call will return immediately.
+			///
+			/// Return value:
+			///     kResultTimeout Timeout
+			///     > 0            The new lock count.
+			int Lock(const ThreadTime& timeoutAbsolute);
+
+			/// Unlock
+			///
+			/// Unlocks the futex. The futex must already be locked at least once by 
+			/// the calling thread. Otherwise the behaviour is not defined.
+			///
+			void Unlock();
+
+			/// GetLockCount
+			///
+			/// Returns the number of locks on the futex. The return value from this 
+			/// function is only reliable if the calling thread already has one lock on 
+			/// the futex. Otherwise the returned value may not represent actual value
+			/// at any point in time, as other threads lock or unlock the futex soon after the call.
+			///
+			int GetLockCount() const;
+
+			/// HasLock
+			/// Returns true if the current thread has the futex locked. 
+			bool HasLock() const;
+
+			/// SetSpinCount
+			/// Specifies how many times we spin while waiting to acquire the lock.
+			void SetSpinCount(Uint spinCount);
+
+		protected:
+			#if EATHREAD_MANUAL_FUTEX_ENABLED
+				void CreateFSemaphore();
+				void DestroyFSemaphore();
+				void SignalFSemaphore();
+				void WaitFSemaphore();
+				bool WaitFSemaphore(const ThreadTime& timeoutAbsolute);
+				void OnLockAcquired(ThreadUniqueId threadUniqueId);
+			#endif
+
+		private:
+			// Objects of this class are not copyable.
+			Futex(const Futex&){}
+			Futex& operator=(const Futex&){ return *this; }
+
+		protected:
+			#if EATHREAD_MANUAL_FUTEX_ENABLED
+				AtomicUWord      mUseCount;         /// Not the same thing as lock count, as waiters increment this value.
+				uint16_t         mRecursionCount;   /// The number of times the lock-owning thread has the mutex. This is currently uint16_t for backward compatibility with PPMalloc.
+				uint16_t         mSpinCount;        /// The number of times we spin while waiting for the lock.   To do: Change these to be uint32_t once PPMalloc is no longer dependent on this.
+				ThreadUniqueId   mThreadUniqueId;   /// Unique id for owning thread; not necessarily same as type ThreadId.
+				EAFutexSemaphore mSemaphore;        /// OS-level semaphore that waiters wait on when lock attempts failed.
+			#else
+
+				#if EA_USE_CPP11_CONCURRENCY
+					std::recursive_timed_mutex mMutex;
+					int mnLockCount;
+					std::thread::id mLockingThread;
+				#elif defined(EA_COMPILER_MSVC) && defined(EA_PLATFORM_MICROSOFT) // In the case of Microsoft platforms, we just use CRITICAL_SECTION, as it is essentially a futex.
+					// We use raw structure math because otherwise we'd expose the user to system headers, 
+					// which breaks code and bloats builds. We validate our math in eathread_futex.cpp.
+					#if defined(_WIN64)
+						uint64_t mCRITICAL_SECTION[FUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t)];
+					#else
+						uint64_t mCRITICAL_SECTION[FUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t)];
+					#endif
+				#elif defined(EA_PLATFORM_SONY)
+					EA::Thread::Mutex mMutex;
+					Uint mSpinCount;
+				#else
+					#define EAT_FUTEX_USE_MUTEX 1
+					EA::Thread::Mutex mMutex;
+				#endif
+			#endif
+		};
+
+
+
+		/// FutexFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Futex.
+		/// A primary use of this would be to allow the Futex implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		///
+		class EATHREADLIB_API FutexFactory
+		{
+		public:
+			static Futex*  CreateFutex();                    // Internally implemented as: return new Futex;
+			static void    DestroyFutex(Futex* pFutex);      // Internally implemented as: delete pFutex;
+
+			static size_t  GetFutexSize();                   // Internally implemented as: return sizeof(Futex);
+			static Futex*  ConstructFutex(void* pMemory);    // Internally implemented as: return new(pMemory) Futex;
+			static void    DestructFutex(Futex* pFutex);     // Internally implemented as: pFutex->~Futex();
+		};
+
+
+
+		/// class AutoFutex
+		/// An AutoFutex locks the Futex in its constructor and 
+		/// unlocks the Futex in its destructor (when it goes out of scope).
+		class EATHREADLIB_API AutoFutex
+		{
+		public:
+			AutoFutex(Futex& futex);
+		   ~AutoFutex();
+
+		protected:
+			Futex& mFutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoFutex(const AutoFutex&);
+			const AutoFutex& operator=(const AutoFutex&);
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Inlines
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EAFutexReadBarrier
+//
+// For futexes, which are intended to be used only in user-space and without 
+// talking to IO devices, DMA memory, or uncached memory, we directly use
+// memory barriers.
+	#define EAFutexReadBarrier      EAReadBarrier
+	#define EAFutexWriteBarrier     EAWriteBarrier
+	#define EAFutexReadWriteBarrier EAReadWriteBarrier
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		#if EATHREAD_MANUAL_FUTEX_ENABLED
+
+			inline Futex::Futex()
+			  : mUseCount(0), 
+				mRecursionCount(0),
+				mSpinCount(EATHREAD_FUTEX_SPIN_COUNT),
+				mThreadUniqueId(kThreadUniqueIdInvalid),
+				mSemaphore()
+			{
+				CreateFSemaphore();
+			}
+
+
+			inline Futex::~Futex()
+			{
+				EAT_ASSERT(mUseCount == 0);
+
+				DestroyFSemaphore();
+			}
+		
+		
+			inline void Futex::OnLockAcquired(ThreadUniqueId threadUniqueId)
+			{
+				EAFutexReadBarrier();
+				mThreadUniqueId = threadUniqueId;
+				mRecursionCount = 1;
+			}
+
+
+			inline bool Futex::TryLock()
+			{
+				ThreadUniqueId threadUniqueId;
+				EAThreadGetUniqueId(threadUniqueId);
+
+				if(mUseCount.SetValueConditional(1, 0)) // If we could acquire the lock... (set it to 1 if it's 0)
+				{
+					OnLockAcquired(threadUniqueId);
+					return true;
+				}
+
+				// This only happens in the case of recursion on the same thread
+				// This is threadsafe because the only case where this equality passes
+				// is when this value was set on this thread anyway.
+				if(EATHREAD_LIKELY(mThreadUniqueId == threadUniqueId)) // If it turns out that we already have the lock...
+				{
+					++mUseCount;
+					++mRecursionCount;
+					return true;
+				}
+
+				return false;
+			}
+
+
+			inline void Futex::Lock()
+			{
+				ThreadUniqueId threadUniqueId;
+				EAThreadGetUniqueId(threadUniqueId);
+
+				if(mSpinCount) // If we have spinning enabled (usually true)...
+				{
+					if(mUseCount.SetValueConditional(1, 0)) // If we could acquire the lock... (set it to 1 if it's 0)
+					{
+						OnLockAcquired(threadUniqueId);
+						return;
+					}
+
+					if(mThreadUniqueId != threadUniqueId) // Don't spin if we already have the lock.
+					{
+						for(Uint count = mSpinCount; count > 0; count--) // Implement a spin lock for a number of tries.
+						{
+							// We use GetValueRaw calls below instead of atomics because we don't want atomic behavior.
+							if(mUseCount.GetValueRaw() > 1) // If there are multiple waiters, don't bother spinning any more, as they are already spinning themselves.
+								break;
+
+							if(mUseCount.GetValueRaw() == 0) // If it looks like the lock is now free, try to acquire it.
+							{
+								if(mUseCount.SetValueConditional(1, 0)) // If we could acquire the lock... (set it to 1 if it's 0)
+								{
+									OnLockAcquired(threadUniqueId);
+									return;
+								}
+							}
+
+							EAProcessorPause();
+						}
+					}
+				}
+
+				if(++mUseCount > 1) // If we could not get the lock (previous value of mUseCount was >= 1 and not 0) or we already had the lock...
+				{
+					if(mThreadUniqueId == threadUniqueId) // If we already have the lock...
+					{
+						mRecursionCount++;
+						return;
+					}
+					WaitFSemaphore(); 
+				}
+				// Else the increment was from 0 to 1, and we own the lock.
+				OnLockAcquired(threadUniqueId);
+			}
+
+
+
+
+			inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+			{
+				if(timeoutAbsolute == kTimeoutNone)
+				{
+					Lock();
+					return (int)mRecursionCount;
+				}
+				else if(timeoutAbsolute == kTimeoutImmediate)
+				{
+					if(TryLock())
+						return (int)mRecursionCount;
+					else
+						return kResultTimeout;
+				}
+				else
+				{
+					ThreadUniqueId threadUniqueId;
+					EAThreadGetUniqueId(threadUniqueId);
+
+					if(++mUseCount > 1) // If we could not get the lock (previous value of mUseCount was >= 1 and not 0) or we already had the lock...
+					{
+						if(mThreadUniqueId == threadUniqueId) // If we already have the lock...
+							return (int)++mRecursionCount;
+
+						if(!WaitFSemaphore(timeoutAbsolute))
+						{
+							--mUseCount;
+							return kResultTimeout;
+						}
+					}
+					// Else the increment was from 0 to 1, and we own the lock.
+					OnLockAcquired(threadUniqueId);
+					return 1;  // Return mRecursionCount.
+				}
+			}
+
+
+			inline void Futex::Unlock()
+			{
+				#if EAT_ASSERT_ENABLED
+					ThreadUniqueId threadUniqueId;
+					EAThreadGetUniqueId(threadUniqueId);
+					EAT_ASSERT(mThreadUniqueId == threadUniqueId);
+					EAT_ASSERT((mRecursionCount > 0) && (mUseCount > 0));
+				#endif
+
+				if(EATHREAD_LIKELY(--mRecursionCount == 0))
+				{
+					mThreadUniqueId = kThreadUniqueIdInvalid;
+
+					// after the decrement below we will no longer own the lock
+					EAFutexWriteBarrier();
+					if(EATHREAD_UNLIKELY(--mUseCount > 0))
+						SignalFSemaphore();
+				}
+				else
+				{
+					// this thread still owns the lock, was recursive
+					--mUseCount;
+				}
+			}
+
+
+			inline int Futex::GetLockCount() const
+			{
+				// No atomic operation or memory barrier required, as this function only
+				// has validity if it is being called from the lock-owning thread. However,
+				// we don't at this time choose to assert that mThreadUniqueId == GetThreadId().
+				return (int)mRecursionCount;
+			}
+
+
+			inline bool Futex::HasLock() const
+			{
+				ThreadUniqueId threadUniqueId;
+				EAThreadGetUniqueId(threadUniqueId);
+
+				return (mThreadUniqueId == threadUniqueId);
+			}
+
+
+			inline void Futex::SetSpinCount(Uint spinCount)
+			{
+				mSpinCount = spinCount;
+			}
+
+		#else // #if EATHREAD_MANUAL_FUTEX_ENABLED
+
+			#if EA_USE_CPP11_CONCURRENCY
+
+				inline Futex::Futex() : mnLockCount(0) {}
+
+				inline Futex::~Futex() { EAT_ASSERT(!GetLockCount()); }
+
+				inline bool Futex::TryLock() 
+				{ 
+					if (mMutex.try_lock())
+					{
+						EAT_ASSERT(mnLockCount >= 0);
+						EAT_ASSERT(mnLockCount == 0 || mLockingThread == std::this_thread::get_id());
+						++mnLockCount;
+						mLockingThread = std::this_thread::get_id();
+						return true;
+					}
+
+					return false;
+				}
+
+				inline void Futex::Lock() { mMutex.lock(); mLockingThread = std::this_thread::get_id(); ++mnLockCount; }
+
+				inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+				{
+					if (timeoutAbsolute == kTimeoutNone)
+					{
+						if (!mMutex.try_lock())
+						{
+							return kResultTimeout;
+						}
+					}
+					else
+					{
+						std::chrono::milliseconds timeoutAbsoluteMs(timeoutAbsolute);
+						std::chrono::time_point<std::chrono::system_clock> timeout_time(timeoutAbsoluteMs);
+						if (!mMutex.try_lock_until(timeout_time))
+						{
+							return kResultTimeout;
+						}
+					}
+
+					EAT_ASSERT(mnLockCount >= 0);
+					EAT_ASSERT(mnLockCount == 0 || mLockingThread == std::this_thread::get_id());
+					mLockingThread = std::this_thread::get_id();
+					return ++mnLockCount; // This is safe to do because we have the lock.
+				}
+
+				inline void Futex::Unlock()
+				{
+					EAT_ASSERT(HasLock());
+					--mnLockCount;
+					if (mnLockCount == 0)
+						mLockingThread = std::thread::id();
+					mMutex.unlock();
+				}
+
+				inline int Futex::GetLockCount() const { return mnLockCount; }
+
+				inline bool Futex::HasLock() const 
+				{ 
+					if ((mnLockCount > 0) && (std::this_thread::get_id() == mLockingThread))
+						return true;
+					return false;
+				}  
+
+				inline void Futex::SetSpinCount(Uint)
+				{
+					// Not supported
+				}
+
+			#elif defined(EA_COMPILER_MSVC) && defined(EA_PLATFORM_MICROSOFT) // Win32, Win64, etc.
+
+				inline Futex::Futex()
+				{
+					// We use InitializeCriticalSectionAndSpinCount, as that has resulted in improved performance in practice on multiprocessors systems.
+					int rv = InitializeCriticalSectionAndSpinCount((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION, EATHREAD_FUTEX_SPIN_COUNT);
+					EAT_ASSERT(rv != 0);
+					EA_UNUSED(rv);
+				}
+
+				inline Futex::~Futex()
+				{
+					EAT_ASSERT(!GetLockCount());
+					DeleteCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION);
+				}
+
+				inline bool Futex::TryLock()
+				{
+					return TryEnterCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION) != 0;
+				}
+
+				inline void Futex::Lock()
+				{
+					EnterCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION);
+				}
+
+				inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+				{
+					if(timeoutAbsolute == kTimeoutNone)
+					{
+						Lock();
+						return GetLockCount();
+					}
+					else if(timeoutAbsolute == kTimeoutImmediate)
+					{
+						if(TryLock())
+							return GetLockCount();
+						else
+							return kResultTimeout;
+					}
+					else
+					{
+						while(!TryLock())
+						{
+							if(GetThreadTime() >= timeoutAbsolute)
+								return kResultTimeout;
+							ThreadSleep(1);
+						}
+						return GetLockCount();
+					}
+				}
+
+				inline void Futex::Unlock()
+				{
+					EAT_ASSERT(HasLock());
+					LeaveCriticalSection((_RTL_CRITICAL_SECTION*)mCRITICAL_SECTION);
+				} 
+
+				inline int Futex::GetLockCount() const
+				{
+					// Return the RecursionCount member of RTL_CRITICAL_SECTION.
+
+					// We use raw structure math because otherwise we'd expose the user to system headers, 
+					// which breaks code and bloats builds. We validate our math in eathread_futex.cpp.
+					#if defined(_WIN64)
+						return *((int*)mCRITICAL_SECTION + 3); 
+					#else
+						return *((int*)mCRITICAL_SECTION + 2);
+					#endif
+				}
+
+				inline bool Futex::HasLock() const
+				{
+					// Check the OwningThread member of RTL_CRITICAL_SECTION.
+
+					// We use raw structure math because otherwise we'd expose the user to system headers, 
+					// which breaks code and bloats builds. We validate our math in eathread_futex.cpp.
+					#if defined(_WIN64)
+						return (*((uint32_t*)mCRITICAL_SECTION + 4) == (uintptr_t)GetCurrentThreadId());
+					#else
+						return (*((uint32_t*)mCRITICAL_SECTION + 3) == (uintptr_t)GetCurrentThreadId());
+					#endif
+				}
+
+				inline void Futex::SetSpinCount(Uint)
+				{
+					// Not supported
+				}
+
+			#elif defined(EAT_FUTEX_USE_MUTEX)
+
+				inline Futex::Futex()
+				  { }
+
+				inline Futex::~Futex()
+				  { }
+
+				inline bool Futex::TryLock()
+				  { return mMutex.Lock(EA::Thread::kTimeoutImmediate) > 0; }
+
+				inline void Futex::Lock()
+				  { mMutex.Lock(); }
+
+				inline int Futex::Lock(const ThreadTime& timeoutAbsolute)
+				  { return mMutex.Lock(timeoutAbsolute); }
+
+				inline void Futex::Unlock()
+				  { mMutex.Unlock(); }
+
+				inline int Futex::GetLockCount() const
+				  { return mMutex.GetLockCount(); }
+
+				inline bool Futex::HasLock() const
+				  { return mMutex.HasLock(); }
+
+				inline void Futex::SetSpinCount(Uint)
+				  { }
+
+			#endif // _MSC_VER
+
+		#endif // EATHREAD_MANUAL_FUTEX_ENABLED
+
+
+
+		inline AutoFutex::AutoFutex(Futex& futex) 
+		  : mFutex(futex)
+		{
+			mFutex.Lock();
+		}
+
+		inline AutoFutex::~AutoFutex()
+		{
+			mFutex.Unlock();
+		}
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_EATHREAD_FUTEX_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 323 - 0
include/eathread/eathread_list.h

@@ -0,0 +1,323 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// This is a small templated list implementation which suffices for our 
+// purposes but is not optimal. It is present in order to avoid dependencies
+// on external libraries.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_LIST_H
+#define EATHREAD_EATHREAD_LIST_H
+
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+#include <stddef.h> // size_t, etc.
+#include <new>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+
+		namespace details
+		{
+			/// Default allocator implementation used by the simple_list class
+			template<typename T>
+			struct ListDefaultAllocatorImpl
+			{
+				template<typename OT>
+				struct rebind { typedef ListDefaultAllocatorImpl<OT> other; };
+
+				T* construct()
+				{
+					Allocator* pAllocator = GetAllocator();
+
+					if(pAllocator)
+						return new(pAllocator->Alloc(sizeof(T))) T;
+					else
+						return new T;
+				}
+
+				void destroy(T* obj)
+				{
+					Allocator* pAllocator = GetAllocator();
+
+					if(pAllocator)
+					{
+						obj->~T();
+						pAllocator->Free(obj);
+					}
+					else
+						delete obj;
+				}
+			};
+		}
+
+
+		/// Simple version of an STL bidirectional list.
+		/// Implemented to avoid dependency on container implementations.
+		///
+		/// This implementation has some non-stl standard methods like find. 
+		///            
+		template<typename T, class Allocator = details::ListDefaultAllocatorImpl<T> >
+		class simple_list
+		{
+			simple_list(const simple_list&);
+			simple_list& operator=(const simple_list&);
+
+		protected:
+			struct list_node
+			{
+				T          mValue;
+				list_node* mpPrev;
+				list_node* mpNext;
+			};
+
+			typedef list_node node_t;
+			typedef typename  Allocator::template rebind<list_node>::other allocator_t;
+			
+			allocator_t      mAllocator;
+			node_t*          mpNodeHead;
+			node_t*          mpNodeTail;
+			size_t           mnSize;
+
+		public:
+			typedef T        value_type;              //< list value type
+			typedef const T  const_value_type;        //< constant list value type
+			typedef const T& const_value_ref_type;    //< constant reference list value type
+			
+			struct         const_iterator;
+			struct         iterator;
+			friend  struct const_iterator;
+			friend  struct iterator;
+
+
+			struct const_iterator
+			{
+				friend class simple_list<T>;
+
+				const_iterator()
+					: mpNode(NULL)
+				{ }
+
+				const_iterator(const const_iterator& rhs)
+					: mpNode(rhs.mpNode)
+				{ }
+
+				const_iterator& operator=(const const_iterator& rhs)
+				{
+					mpNode = rhs.mpNode;
+					return *this;
+				}
+
+				const T& operator*() const
+					{ return mpNode->mValue; }
+
+				const T* operator->() const
+					{ return &**this; }  
+					 
+				bool operator==(const const_iterator& rhs) const
+					{ return rhs.mpNode == mpNode; }
+
+				bool operator!=(const const_iterator& rhs) const
+					{ return rhs.mpNode != mpNode; }
+
+				const_iterator& operator++()
+				{
+					mpNode = mpNode->mpNext;
+					return *this;
+				}
+
+			protected:
+				const node_t* mpNode;
+
+			protected:
+				const_iterator(node_t* pNode)
+					: mpNode(pNode)
+				{ }
+
+				const_iterator& operator=(const node_t* pNode)
+				{
+					mpNode = pNode;
+					return *this;
+				}
+			}; // const_iterator
+
+
+
+			struct iterator : public const_iterator
+			{
+				friend class simple_list<T>;
+
+				iterator()
+					: const_iterator(){ }
+
+				iterator(const const_iterator& rhs)
+					: const_iterator(rhs)
+				{ }
+
+				iterator& operator=(const const_iterator& rhs)
+				{
+					*static_cast<const_iterator*>(this)= rhs;
+					return *this;
+				}
+
+				T& operator*() const
+					{ return const_cast<T&>(**static_cast<const const_iterator*>(this)); }
+
+				T& operator->() const
+					{ return const_cast<T*>(&**static_cast<const const_iterator*>(this)); }
+
+				iterator& operator++()
+				{
+					++(*static_cast<const_iterator*>(this));
+					return *this;
+				}
+
+			protected:
+				iterator(node_t* pNode)
+					: const_iterator(pNode)
+				{ }
+
+				iterator& operator=(node_t* pNode)
+				{
+					const_cast<node_t*>(*this) = pNode;
+					return *this;
+				}
+			}; // iterator
+
+
+
+			simple_list()
+				: mnSize(0)
+			{
+				mpNodeHead         = mAllocator.construct();
+				mpNodeTail         = mAllocator.construct();
+				mpNodeHead->mpNext = mpNodeTail;
+				mpNodeHead->mpPrev = mpNodeTail;
+				mpNodeTail->mpNext = mpNodeHead;
+				mpNodeTail->mpPrev = mpNodeHead;
+			}
+
+			~simple_list()
+			{
+				clear();
+				mAllocator.destroy(mpNodeHead);
+				mAllocator.destroy(mpNodeTail);
+			}
+
+			bool empty() const
+				{ return mpNodeHead->mpNext == mpNodeTail; }
+
+			void push_back(const T& value)
+			{
+				node_t* const pNode   = mAllocator.construct();
+				pNode->mValue         = value;
+				pNode->mpPrev         = mpNodeTail->mpPrev;                        
+				pNode->mpNext         = mpNodeTail;
+				pNode->mpPrev->mpNext = pNode;
+				mpNodeTail->mpPrev    = pNode;
+				++mnSize;
+			}
+
+			void push_front(const T& value)
+			{
+				node_t* const pNode = mAllocator.construct();
+				pNode->mValue       = value;
+				pNode->mpPrev       = mpNodeHead;
+				pNode->mpNext       = mpNodeHead->mpNext;
+				mpNodeHead->mpNext  = pNode;
+				++mnSize;
+			}
+
+			void pop_front()
+			{
+				if(!empty())
+				{
+					node_t* const pNode   = mpNodeHead->mpNext;
+					mpNodeHead->mpNext    = pNode->mpNext;
+					pNode->mpNext->mpPrev = mpNodeHead;
+					mAllocator.destroy(pNode);
+					--mnSize;
+				}
+			}
+
+			size_t size() const
+				{ return mnSize; }
+
+			iterator erase(iterator& iter)
+			{
+				if(!empty())
+				{
+					node_t* const pNext = iter.mpNode->mpNext;
+					iter.mpNode->mpNext->mpPrev = iter.mpNode->mpPrev;
+					iter.mpNode->mpPrev->mpNext = iter.mpNode->mpNext;
+					--mnSize;
+					mAllocator.destroy(const_cast<node_t*>(iter.mpNode));
+					return pNext;
+				}
+				return end();
+			}
+
+			void clear()
+			{
+				if(!empty())
+				{
+					node_t* pNode = mpNodeHead->mpNext;
+
+					while(pNode != mpNodeTail)
+					{
+						node_t* const pNext   = pNode->mpNext;
+						pNode->mpNext->mpPrev = pNode->mpPrev;
+						pNode->mpPrev->mpNext = pNext;
+						mAllocator.destroy(pNode);
+						pNode = pNext;
+					}
+					mnSize = 0;
+				}
+			}
+
+			T& front() const
+				{ return mpNodeHead->mpNext->mValue; }
+
+			const const_iterator begin() const
+				{ return mpNodeHead->mpNext; }
+
+			const const_iterator end() const
+				{ return mpNodeTail; }
+
+			/// returns end()if not found
+			iterator find(const T& element)
+			{
+				iterator iter = begin();
+				while((iter != end()) && !(element == *iter))
+					 ++iter;
+				return iter;
+			}
+
+		}; // simple_list
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // EATHREAD_EATHREAD_LIST_H
+
+
+
+
+
+
+
+

+ 341 - 0
include/eathread/eathread_mutex.h

@@ -0,0 +1,341 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a lightweight mutex.
+/////////////////////////////////////////////////////////////////////////////
+
+// TODO(rparolin):  Consider adding support for static thread safety analysis.
+// https://clang.llvm.org/docs/ThreadSafetyAnalysis.html
+
+
+#ifndef EATHREAD_EATHREAD_MUTEX_H
+#define EATHREAD_EATHREAD_MUTEX_H
+
+#if defined(_MSC_VER)
+#include <math.h>   // #include math.h because VC++ has a header file but that requires math.h to be #included before some other headers, lest you get a warning.
+#endif
+#include <stddef.h>
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EAMutexData
+///
+/// This is used internally by class Mutex.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+#if !EA_THREADS_AVAILABLE
+	#define EA_THREAD_NONTHREADED_MUTEX 1
+
+	struct EAMutexData
+	{
+		int mnLockCount;
+
+		EAMutexData();
+	};
+
+#elif EA_USE_CPP11_CONCURRENCY
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <mutex>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+	#if defined EA_PLATFORM_MICROSOFT
+		#ifdef CreateMutex
+			#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+		#endif
+	#endif
+
+	struct EAMutexData
+	{
+		std::recursive_timed_mutex mMutex;
+		int mnLockCount;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId; // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+
+	private:
+		EAMutexData(const EAMutexData&);
+		EAMutexData& operator=(const EAMutexData&);
+	};
+
+#elif defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+	#include <eathread/internal/timings.h>
+
+	struct EAMutexData
+	{
+		ScePthreadMutex mMutex;
+		int mnLockCount;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId;    // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+		void SimulateLock(bool bLock);
+	};
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <pthread.h>
+
+	#if defined(EA_PLATFORM_WINDOWS)
+		#ifdef CreateMutex
+			#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+		#endif
+	#endif
+
+	struct EAMutexData
+	{
+		pthread_mutex_t mMutex;
+		int mnLockCount;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId;    // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+		void SimulateLock(bool bLock);
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+
+	#ifdef EA_PROCESSOR_X86_64
+		static const int MUTEX_PLATFORM_DATA_SIZE = 40; // CRITICAL_SECTION is 40 bytes on Win64.
+	#else
+		static const int MUTEX_PLATFORM_DATA_SIZE = 32; // CRITICAL_SECTION is 24 bytes on Win32, 28 bytes on XBox 360.
+	#endif
+
+	#ifdef CreateMutex
+		#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+	#endif
+
+	struct EATHREADLIB_API EAMutexData
+	{
+		uint64_t mData[MUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t)]; // Holds either CRITICAL_SECTION or HANDLE if mbIntraProcess is true or false, respectively.
+		int      mnLockCount;
+		bool     mbIntraProcess;
+		#if EAT_ASSERT_ENABLED
+			EA::Thread::ThreadId mThreadId;    // This value is only valid in debug builds.
+			EA::Thread::SysThreadId mSysThreadId; // This value is only valid in debug builds.
+		#endif
+
+		EAMutexData();
+	};
+
+#else
+	#define EA_THREAD_NONTHREADED_MUTEX 1
+
+	struct EAMutexData
+	{
+		int mnLockCount;
+
+		EAMutexData();
+	};
+
+
+
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// MutexParameters
+		/// Specifies mutex settings.
+		struct EATHREADLIB_API MutexParameters
+		{
+			bool mbIntraProcess; /// True if the mutex is intra-process, else inter-process.
+			char mName[128];      /// Mutex name, applicable only to platforms that recognize named synchronization objects.
+
+			MutexParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class Mutex
+		///
+		/// Mutex are assumed to always be 'recursive', meaning that a given thread 
+		/// can lock the mutex more than once. If you want a specifically non-recursive 
+		/// mutex, you can use a semaphore with a lock count of 1.
+		class EATHREADLIB_API Mutex
+		{
+		public:
+			enum Result
+			{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			/// Mutex
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Mutex(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Mutex(NULL, false).
+			Mutex(const MutexParameters* pMutexParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~Mutex
+			/// Destroys an existing mutex. The mutex must not be locked by any thread, 
+			/// otherwise the resulting behaviour is undefined.
+			~Mutex();
+
+			/// Init
+			/// Initializes the mutex if not done so in the constructor.
+			/// This should only be called in the case that this class was constructed 
+			/// with RWMutex(NULL, false).
+			bool Init(const MutexParameters* pMutexParameters);
+
+			/// Lock
+			/// Locks the mutex, with a timeout specified. This function will
+			/// return immediately if the mutex is not locked or if the calling
+			/// thread already has it locked at least once. If the mutex is 
+			/// locked by another thread, this function will block until the mutex
+			/// is unlocked by the owning thread or until the timeout time has
+			/// passed. This function may return before the specified timeout has passed
+			/// and so should not be implicitly used as a timer. Some platforms may 
+			/// return immediately if the timeout is specified as anything but kTimeoutNone.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			/// Return value:
+			///     kResultError   Error
+			///     kResultTimeout Timeout
+			///     > 0            The new lock count.
+			int Lock(const ThreadTime& timeoutAbsolute = EA::Thread::kTimeoutNone);
+
+			/// Unlock
+			/// Unlocks the mutex. The mutex must already be locked at least once by 
+			/// the calling thread. Otherwise the behaviour is not defined.
+			/// Return value is the lock count value immediately upon unlock.
+			int Unlock();
+
+			/// GetLockCount
+			/// Returns the number of locks on the mutex. The return value from this 
+			/// function is only reliable if the calling thread already has one lock on 
+			/// the critical section. Otherwise the value could be changing as other 
+			/// threads lock or unlock the mutex soon after the call.
+			/// This function is useful in debugging and asserting and useful for backing
+			/// out of recursive locks under the case of exceptions and other abortive 
+			/// situations. This function will not necessarily call memory synchronization 
+			/// primitives (e.g. ReadBarrier) itself on systems that require SMP synchronization.
+			int GetLockCount() const;
+
+
+			/// HasLock
+			/// Returns true if the current thread has the mutex locked. 
+			/// This function is reliable only in a debug build whereby 
+			/// EAT_ASSERT_ENABLED is defined to 1. This function can thus
+			/// only be used in debugging situations whereby you want to 
+			/// assert that you have a mutex locked or not. To make this 
+			/// function work in a non-debug environment would necessitate
+			/// adding an undesirable amount of code and data.
+			bool HasLock() const;
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mMutexData; }
+
+		protected:
+			EAMutexData mMutexData;
+
+		private:
+			// Objects of this class are not copyable.
+			Mutex(const Mutex&){}
+			Mutex& operator=(const Mutex&){ return *this; }
+		};
+
+
+
+		/// MutexFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Mutex.
+		/// A primary use of this would be to allow the Mutex implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API MutexFactory
+		{
+		public:
+			static Mutex*  CreateMutex();                    // Internally implemented as: return new Mutex;
+			static void    DestroyMutex(Mutex* pMutex);      // Internally implemented as: delete pMutex;
+
+			static size_t  GetMutexSize();                   // Internally implemented as: return sizeof(Mutex);
+			static Mutex*  ConstructMutex(void* pMemory);    // Internally implemented as: return new(pMemory) Mutex;
+			static void    DestructMutex(Mutex* pMutex);     // Internally implemented as: pMutex->~Mutex();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoMutex
+		/// An AutoMutex locks the Mutex in its constructor and 
+		/// unlocks the Mutex in its destructor (when it goes out of scope).
+		class EATHREADLIB_API AutoMutex
+		{
+		public:
+			inline AutoMutex(Mutex& mutex) 
+				: mMutex(mutex)
+				{ mMutex.Lock(); }
+
+			inline ~AutoMutex()
+				{ mMutex.Unlock(); }
+
+		protected:
+			Mutex& mMutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoMutex(const AutoMutex&);
+			const AutoMutex& operator=(const AutoMutex&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_EATHREAD_MUTEX_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 302 - 0
include/eathread/eathread_pool.h

@@ -0,0 +1,302 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a classic thread pool.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_POOL_H
+#define EATHREAD_EATHREAD_POOL_H
+
+
+#ifndef EATHREAD_EATHREAD_THREAD_H
+	#include <eathread/eathread_thread.h>
+#endif
+#ifndef EATHREAD_EATHREAD_CONDITION_H
+	#include <eathread/eathread_condition.h>
+#endif
+#ifndef EATHREAD_EATHREAD_ATOMIC_H
+	#include <eathread/eathread_atomic.h>
+#endif
+#ifndef EATHREAD_EATHREAD_LIST_H
+	#include <eathread/eathread_list.h>
+#endif
+#include <stddef.h>
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'EA::Thread::simple_list<T>' needs to have
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////////
+// EA_THREAD_POOL_MAX_SIZE
+//
+// Defines the maximum number of threads the pool can have.
+// Currently we have a limit of at most N threads in a pool, in order to 
+// simplify memory management issues.
+//
+#ifndef EA_THREAD_POOL_MAX_SIZE
+	#define EA_THREAD_POOL_MAX_SIZE 16
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// ThreadPoolParameters
+		/// Specifies how a thread pool is initialized
+		struct EATHREADLIB_API ThreadPoolParameters
+		{
+			unsigned         mnMinCount;                /// Default is kDefaultMinCount.
+			unsigned         mnMaxCount;                /// Default is kDefaultMaxCount.
+			unsigned         mnInitialCount;            /// Default is kDefaultInitialCount
+			ThreadTime       mnIdleTimeoutMilliseconds; /// Default is kDefaultIdleTimeout. This is a relative time, not an absolute time. Can be a millisecond value or Thread::kTimeoutNone or Thread::kTimeoutImmediate.
+			unsigned         mnProcessorMask;           /// Default is 0xffffffff. Controls which processors we are allowed to create threads on. Default is all processors.
+			ThreadParameters mDefaultThreadParameters;  /// Currently only the mnStackSize, mnPriority, and mpName fields from ThreadParameters are used.
+
+			ThreadPoolParameters();
+
+		private:
+			// Prevent default generation of these functions by not defining them
+			ThreadPoolParameters(const ThreadPoolParameters& rhs);               // copy constructor
+			ThreadPoolParameters& operator=(const ThreadPoolParameters& rhs);    // assignment operator
+		};
+
+
+		/// class ThreadPool
+		/// 
+		/// Implements a conventional thread pool. Thread pools are useful for situations where
+		/// thread creation and destruction is common and the application speed would improve
+		/// by using pre-made threads that are ready to execute. 
+		class EATHREADLIB_API ThreadPool
+		{
+		public:
+			enum Default
+			{
+				kDefaultMinCount      = 0,
+				kDefaultMaxCount      = 4,
+				kDefaultInitialCount  = 0,
+				kDefaultIdleTimeout   = 60000, // Milliseconds
+				kDefaultProcessorMask = 0xffffffff
+			};
+
+			enum Result
+			{
+				kResultOK       =  0,
+				kResultError    = -1,
+				kResultTimeout  = -2,
+				kResultDeferred = -3
+			};
+
+			enum JobWait
+			{
+				kJobWaitNone,    /// Wait for no jobs to complete, including those currently running.
+				kJobWaitCurrent, /// Wait for currently proceeding jobs to complete but not those that haven't started.
+				kJobWaitAll      /// Wait for all jobs to complete, including those that haven't yet begun.
+			};
+
+			/// ThreadPool
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use ThreadPool(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to ThreadPool(NULL, false).
+			ThreadPool(const ThreadPoolParameters* pThreadPoolParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~ThreadPool
+			/// Destroys the thread pool. Waits for any busy threads to complete.
+		   ~ThreadPool();
+
+			/// Init
+			/// Initializes the thread pool with given characteristics. If the thread pool is 
+			/// already initialized, this updates the settings.
+			bool Init(const ThreadPoolParameters* pThreadPoolParameters);
+
+			/// Shutdown
+			/// Disables the thread pool, waits for busy threads to complete, destroys all threads.
+			///
+			/// If bWaitForAllJobs is true, then Shutdown will wait until all jobs, including
+			/// jobs that haven't been started yet, to complete. Otherwise, only currently 
+			/// proceeding jobs will be completed. 
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			bool Shutdown(JobWait jobWait = kJobWaitAll, const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Begin
+			/// Starts a thread from the pool with the given parameters. 
+			/// Returns kResultError or a job id of >= kResultOK. A return of kResultDeferred is 
+			/// possible if the number of active threads is greater or equal to the max count.
+			/// If input ppThread is non-NULL and return value is >= kResultOK, the returned thread
+			/// will be the thread used for the job. Else the returned thread pointer will be NULL.
+			/// If input bEnabledDeferred is false but the max count of active theads has been 
+			/// reached, a new thread is nevertheless created.
+			int Begin(IRunnable*       pRunnable, void* pContext = NULL, Thread** ppThread = NULL, bool bEnableDeferred = false);
+			int Begin(RunnableFunction pFunction, void* pContext = NULL, Thread** ppThread = NULL, bool bEnableDeferred = false);
+
+			/// WaitForJobCompletion
+			/// Waits for an individual job or for all jobs (job id of -1) to complete. 
+			/// If a job id is given which doesn't correspond to any existing job, 
+			/// the job is assumed to have been completed and the wait completes immediately.
+			/// If new jobs are added while the wait is occurring, this function will wait
+			/// for those jobs to complete as well. jobWait is valid only if nJob is -1.
+			/// Note that the timeout is specified in absolute time and not relative time.
+			/// Returns one of enum Result.
+			int WaitForJobCompletion(int nJob = -1, JobWait jobWait = kJobWaitAll, const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Pause
+			/// Enables or disables the activation of threads from the pool. 
+			/// When paused, calls to Begin will return kResultDeferred instead of kResultOK.
+			void Pause(bool bPause);
+
+			/// Locks the thread pool thread list.
+			void Lock();
+			void Unlock();
+
+			struct Job
+			{
+				int              mnJobID;       /// Unique job id.
+				IRunnable*       mpRunnable;    /// User-supplied IRunnable. This is an alternative to mpFunction.
+				RunnableFunction mpFunction;    /// User-supplied function. This is an alternative to mpRunnable.
+				void*            mpContext;     /// User-supplied context.
+
+				Job();
+			};
+
+			struct ThreadInfo
+			{
+				volatile bool mbActive;         /// True if the thread is currently busy working on a job.
+				volatile bool mbQuit;           /// If set to true then this thread should quit at the next opportunity.
+			  //bool          mbPersistent;     /// If true then this thread is never quit at runtime. False by default.
+				Thread*       mpThread;         /// The Thread itself.
+				ThreadPool*   mpThreadPool;     /// The ThreadPool that owns this thread.
+				Job           mCurrentJob;      /// The most recent job a thread is or was working on.
+
+				ThreadInfo();
+			};
+
+			/// AddThread
+			/// Adds a new thread with the given ThreadParameters.
+			/// The return value is not safe to use unless this function is called
+			/// and the result used within a Lock/Unlock pair.
+			/// It's the user's responsibility to supply ThreadParameters that are sane.
+			/// If bBeginThread is true, then the Thread is started via a call to 
+			/// pThreadInfo->mpThread->Begin(ThreadFunction, pThreadInfo, &tp);
+			/// Otherwise the user is expected to manually start the thread.
+			ThreadInfo* AddThread(const ThreadParameters& tp, bool bBeginThread);
+
+			// Gets the ThreadInfo for the nth Thread identified by index. 
+			// You must call this function and use the info within a Lock/Unlock pair 
+			// on the thread pool.
+			ThreadInfo* GetThreadInfo(int index);
+
+			// Unless you call this function while the Pool is locked (via Lock), the return
+			// value may be out of date by the time you read it. 
+			int GetThreadCount();
+
+		protected:
+			typedef EA::Thread::simple_list<Job>         JobList;
+			typedef EA::Thread::simple_list<ThreadInfo*> ThreadInfoList;
+
+			// Member functions
+			static intptr_t ThreadFunction(void* pContext);
+			ThreadInfo*     CreateThreadInfo();
+			void            SetupThreadParameters(ThreadParameters& tp);
+			void            AdjustThreadCount(unsigned nCount);
+			Result          QueueJob(const Job& job, Thread** ppThread, bool bEnableDeferred);
+			void            AddThread(ThreadInfo* pThreadInfo);
+			void            RemoveThread(ThreadInfo* pThreadInfo);
+			void            FixThreads();
+
+			// Member data
+			bool                mbInitialized;              // 
+			uint32_t            mnMinCount;                 // Min number of threads to have available.
+			uint32_t            mnMaxCount;                 // Max number of threads to have available.
+			AtomicInt32         mnCurrentCount;             // Current number of threads available.
+			AtomicInt32         mnActiveCount;              // Current number of threads busy with jobs.
+			ThreadTime          mnIdleTimeoutMilliseconds;  // Timeout before quitting threads that have had no jobs.
+			uint32_t            mnProcessorMask;            // If mask is not 0xffffffff then we manually round-robin assign processors.
+			uint32_t            mnProcessorCount;           // The number of processors currently present.
+			uint32_t            mnNextProcessor;            // Used if we are manually round-robin assigning processors. 
+			AtomicInt32         mnPauseCount;               // A positive value means we pause working on jobs.
+			AtomicInt32         mnLastJobID;                // 
+			ThreadParameters    mDefaultThreadParameters;   // 
+			Condition           mThreadCondition;           // Manages signalling mJobList.
+			Mutex               mThreadMutex;               // Guards manipulation of mThreadInfoList and mJobList.
+			ThreadInfoList      mThreadInfoList;            // List of threads in our pool.
+			JobList             mJobList;                   // List of waiting jobs.
+
+		private:
+			// Prevent default generation of these functions by not defining them
+			ThreadPool(const ThreadPool& rhs);               // copy constructor
+			ThreadPool& operator=(const ThreadPool& rhs);    // assignment operator
+		};
+
+
+
+		/// ThreadPoolFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class ThreadPool.
+		/// A primary use of this would be to allow the ThreadPool implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ThreadPoolFactory
+		{
+		public:
+			static ThreadPool*  CreateThreadPool();                          // Internally implemented as: return new ThreadPool;
+			static void         DestroyThreadPool(ThreadPool* pThreadPool);  // Internally implemented as: delete pThreadPool;
+
+			static size_t       GetThreadPoolSize();                         // Internally implemented as: return sizeof(ThreadPool);
+			static ThreadPool*  ConstructThreadPool(void* pMemory);          // Internally implemented as: return new(pMemory) ThreadPool;
+			static void         DestructThreadPool(ThreadPool* pThreadPool); // Internally implemented as: pThreadPool->~ThreadPool();
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// re-enable warning 4251 (it's a level-1 warning and should not be suppressed globally)
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_POOL_H
+
+
+
+
+
+
+
+
+
+
+

+ 221 - 0
include/eathread/eathread_rwmutex.h

@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a lightweight mutex with multiple reads but single writer.
+// This allows for high performance systems whereby the consumers of data
+// are more common than the producers of data.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_RWMUTEX_H
+#define EATHREAD_EATHREAD_RWMUTEX_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EARWMutexData
+///
+/// This is used internally by class RWMutex.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_condition.h>
+
+	struct EATHREADLIB_API EARWMutexData
+	{
+		int                   mnReadWaiters;
+		int                   mnWriteWaiters;
+		int                   mnReaders;
+		EA::Thread::ThreadId  mThreadIdWriter;
+		EA::Thread::Mutex     mMutex;
+		EA::Thread::Condition mReadCondition;
+		EA::Thread::Condition mWriteCondition;
+
+		EARWMutexData();
+
+	private:
+		// Prevent default generation of these functions by declaring but not defining them.
+		EARWMutexData(const EARWMutexData& rhs);               // copy constructor
+		EARWMutexData& operator=(const EARWMutexData& rhs);    // assignment operator
+	};
+/////////////////////////////////////////////////////////////////////////
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// RWMutexParameters
+		/// Specifies rwlock settings.
+		struct EATHREADLIB_API RWMutexParameters
+		{
+			bool mbIntraProcess; /// True if the mutex is intra-process, else inter-process.
+			char mName[16];      /// Mutex name, applicable only to platforms that recognize named synchronization objects.
+
+			RWMutexParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class RWMutex
+		/// Implements a multiple reader / single writer mutex.
+		/// This allows for significantly higher performance when data to be protected
+		/// is read much more frequently than written. In this case, a waiting writer
+		/// gets top priority and all new readers block after a waiter starts waiting.
+		class EATHREADLIB_API RWMutex
+		{
+		public:
+			enum Result
+			{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			enum LockType
+			{
+				kLockTypeNone  = 0,
+				kLockTypeRead  = 1,
+				kLockTypeWrite = 2
+			};
+
+			/// RWMutex
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use RWMutex(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to RWMutex(NULL, false).
+			RWMutex(const RWMutexParameters* pRWMutexParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~RWMutex
+			/// Destroys an existing mutex. The mutex must not be locked by any thread, 
+			/// otherwise the resulting behaviour is undefined.
+			~RWMutex();
+
+			/// Init
+			/// Initializes the mutex if not done so in the constructor.
+			/// This should only be called in the case that this class was constructed 
+			/// with RWMutex(NULL, false).
+			bool Init(const RWMutexParameters* pRWMutexParameters);
+
+			/// Lock
+			/// Returns the new lock count for the given lock type.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			int Lock(LockType lockType, const ThreadTime& timeoutAbsolute = EA::Thread::kTimeoutNone);
+
+			/// Unlock
+			/// Unlocks the mutex. The mutex must already be locked by  the 
+			/// calling thread. Otherwise the behaviour is not defined.
+			/// Return value is the lock count value immediately upon unlock
+			/// or is one of enum Result.
+			int Unlock();
+
+			/// GetLockCount
+			int GetLockCount(LockType lockType);
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mRWMutexData; }
+
+		protected:
+			EARWMutexData mRWMutexData;
+
+		private:
+			// Objects of this class are not copyable.
+			RWMutex(const RWMutex&){}
+			RWMutex& operator=(const RWMutex&){ return *this; }
+		};
+
+
+		/// RWMutexFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWMutex.
+		/// A primary use of this would be to allow the RWMutex implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API RWMutexFactory
+		{
+		public:
+			static RWMutex*  CreateRWMutex();                       // Internally implemented as: return new RWMutex;
+			static void      DestroyRWMutex(RWMutex* pRWMutex);     // Internally implemented as: delete pRWMutex;
+
+			static size_t    GetRWMutexSize();                      // Internally implemented as: return sizeof(RWMutex);
+			static RWMutex*  ConstructRWMutex(void* pMemory);       // Internally implemented as: return new(pMemory) RWMutex;
+			static void      DestructRWMutex(RWMutex* pRWMutex);    // Internally implemented as: pRWMutex->~RWMutex();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoRWMutex
+		/// An AutoRWMutex locks the RWMutex in its constructor and 
+		/// unlocks the AutoRWMutex in its destructor (when it goes out of scope).
+		class AutoRWMutex
+		{
+		public:
+			AutoRWMutex(RWMutex& mutex, RWMutex::LockType lockType) 
+				: mMutex(mutex)
+				{  mMutex.Lock(lockType); }
+
+		  ~AutoRWMutex()
+				{  mMutex.Unlock(); }
+
+		protected:
+			RWMutex& mMutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWMutex(const AutoRWMutex&);
+			const AutoRWMutex& operator=(const AutoRWMutex&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+#endif // EATHREAD_EATHREAD_RWMUTEX_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 430 - 0
include/eathread/eathread_rwmutex_ip.h

@@ -0,0 +1,430 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an interprocess mutex with multiple reads but single writer.
+// This allows for high performance systems whereby the consumers of mpData
+// are more common than the producers of mpData.
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef EATHREAD_EATHREAD_RWMUTEX_IP_H
+#define EATHREAD_EATHREAD_RWMUTEX_IP_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <new>
+#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+	#pragma warning(push, 0)
+	#include <Windows.h>
+	#pragma warning(pop)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifdef _MSC_VER
+	#pragma warning(push)           // We have to be careful about disabling this warning. Sometimes the warning is meaningful; sometimes it isn't.
+	#pragma warning(disable: 4251)  // class (some template) needs to have dll-interface to be used by clients.
+	#pragma warning(disable: 6054)  // String 'argument 2' might not be zero-terminated
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+
+			template<typename T>
+			class Shared
+			{
+			public:
+				Shared();
+				Shared(const char* pName);
+			   ~Shared();
+
+				bool Init(const char* pName);
+				void Shutdown();
+				bool IsNew() const { return mbCreated; }
+				T*   operator->()  { return static_cast<T*>(mpData); }
+
+			protected:
+				uint32_t& GetRefCount();
+
+				Shared(const Shared&);
+				Shared& operator=(const Shared&);
+
+			protected:
+				HANDLE mMapping;
+				void*  mpData;
+				bool   mbCreated;
+				char   mName[32];
+				T*     mpT;         // For debug purposes only.
+			};
+
+
+			template <typename T>
+			inline Shared<T>::Shared()
+			  : mMapping(NULL)
+			  , mpData(NULL)
+			  , mbCreated(false)
+			  , mpT(NULL)
+			{
+			}
+
+
+			template <typename T>
+			inline Shared<T>::Shared(const char* pName)
+			  : mMapping(NULL)
+			  , mpData(NULL)
+			  , mbCreated(false)
+			  , mpT(NULL)
+			{
+				Init(pName);
+			}
+
+
+			template <typename T>
+			inline Shared<T>::~Shared()
+			{
+				Shutdown();
+			}
+
+
+			template <typename T>
+			inline bool Shared<T>::Init(const char* pName)
+			{
+				bool bReturnValue = false;
+
+				if(pName)
+					strncpy(mName, pName, sizeof(mName));
+				else
+					mName[0] = 0;
+				mName[sizeof(mName) - 1] = 0;
+		 
+				char mutexName[sizeof(mName) + 16];
+				strcpy(mutexName, mName);
+				strcat(mutexName, ".SharedMutex");
+				HANDLE hMutex = CreateMutexA(NULL, FALSE, mutexName);
+				EAT_ASSERT(hMutex != NULL);
+				if(hMutex != NULL)
+				{
+					WaitForSingleObject(hMutex, INFINITE); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+
+					const size_t kDataSize = sizeof(T) + 8; // Add bytes so that we can store a ref-count of our own after the mpData. 
+					mMapping = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, kDataSize, mName);
+
+					if(mMapping)
+					{
+						mbCreated = (GetLastError() != ERROR_ALREADY_EXISTS);
+						mpData    = MapViewOfFile(mMapping, FILE_MAP_ALL_ACCESS, 0, 0, kDataSize);
+
+						uint32_t& refCount = GetRefCount(); // The ref count is stored at the end of the mapped data.
+
+						if(mbCreated)           // If we were the first one to create this, then construct it.
+						{
+							new(mpData) T;
+							refCount = 1;
+						}
+						else
+							refCount++;
+
+						mpT = static_cast<T*>(mpData); // For debug purposes only.
+
+						bReturnValue = true;
+					}
+
+					ReleaseMutex(hMutex);
+					CloseHandle(hMutex);
+				}
+
+				return bReturnValue;
+			}
+
+
+			template <typename T>
+			inline void Shared<T>::Shutdown()
+			{
+				char mutexName[sizeof(mName) + 16];
+				strcpy(mutexName, mName);
+				strcat(mutexName, ".SharedMutex");
+				HANDLE hMutex = CreateMutexA(NULL, FALSE, mutexName);
+				EAT_ASSERT(hMutex != NULL);
+				if(hMutex != NULL)
+				{
+					WaitForSingleObject(hMutex, INFINITE); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+
+					if(mMapping)
+					{
+						if(mpData)
+						{
+							uint32_t& refCount = GetRefCount(); // The ref count is stored at the end of the mapped data.
+
+							if(refCount == 1)                   // If we are the last to use it, 
+								static_cast<T*>(mpData)->~T();
+							else
+								refCount--;
+
+							UnmapViewOfFile(mpData);
+							mpData = NULL;
+						}
+
+						CloseHandle(mMapping);
+						mMapping = 0;
+					}
+
+					ReleaseMutex(hMutex);
+					CloseHandle(hMutex);
+				} 
+			}
+
+			template <typename T>
+			inline uint32_t& Shared<T>::GetRefCount()
+			{
+				// There will be space after T because we allocated it in Init.
+				uint32_t* pData32 = (uint32_t*)(((uintptr_t)mpData + sizeof(T) + 3) & ~3); // Round up to next 32 bit boundary.
+				return *pData32;
+			}
+
+		#else
+
+			template<typename T>
+			class Shared
+			{
+			public:
+				Shared()               { }
+				Shared(const char*)    { }
+
+				bool Init(const char*) { return true; }
+				void Shutdown()        { }
+				bool IsNew() const     { return true; }
+				T*   operator->()      { return &mT; }
+
+				T mT;
+			};
+
+		#endif // #if defined(EA_PLATFORM_WINDOWS)
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/////////////////////////////////////////////////////////////////////////
+		/// EARWMutexIPData
+		///
+		#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+
+			struct EATHREADLIB_API SharedData
+			{
+				int   mnReadWaiters;
+				int   mnWriteWaiters;
+				int   mnReaders;
+				DWORD mThreadIdWriter;      // Need to use a thread id instead of a thread handle.
+
+				SharedData() : mnReadWaiters(0), mnWriteWaiters(0), mnReaders(0), mThreadIdWriter(EA::Thread::kSysThreadIdInvalid) { }
+			};
+
+			struct EATHREADLIB_API EARWMutexIPData
+			{
+				Shared<SharedData> mSharedData;
+				HANDLE             mMutex;
+				HANDLE             mReadSemaphore;
+				HANDLE             mWriteSemaphore;
+
+				EARWMutexIPData();
+			   ~EARWMutexIPData();
+
+				bool Init(const char* pName);
+				void Shutdown();
+
+			private:
+				EARWMutexIPData(const EARWMutexIPData&);
+				EARWMutexIPData& operator=(const EARWMutexIPData&);
+			};
+
+		#else
+
+			struct EATHREADLIB_API EARWMutexIPData
+			{
+				EARWMutexIPData(){}
+
+			private:
+				EARWMutexIPData(const EARWMutexIPData&);
+				EARWMutexIPData& operator=(const EARWMutexIPData&);
+			};
+
+		#endif
+
+
+		/// RWMutexParameters
+		struct EATHREADLIB_API RWMutexIPParameters
+		{
+			bool mbIntraProcess; /// True if the mutex is intra-process, else inter-process.
+			char mName[16];      /// Mutex name, applicable only to platforms that recognize named synchronization objects.
+
+			RWMutexIPParameters(bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class RWMutexIP
+		/// Implements an interprocess multiple reader / single writer mutex.
+		/// This allows for significantly higher performance when mpData to be protected
+		/// is read much more frequently than written. In this case, a waiting writer
+		/// gets top priority and all new readers block after a waiter starts waiting.
+		class EATHREADLIB_API RWMutexIP
+		{
+		public:
+			enum Result
+			{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			enum LockType
+			{
+				kLockTypeNone  = 0,
+				kLockTypeRead  = 1,
+				kLockTypeWrite = 2
+			};
+
+			/// RWMutexIP
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use RWMutexIP(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to RWMutexIP(NULL, false).
+			RWMutexIP(const RWMutexIPParameters* pRWMutexIPParameters = NULL, bool bDefaultParameters = true);
+
+			/// ~RWMutexIP
+			/// Destroys an existing mutex. The mutex must not be locked by any thread, 
+			/// otherwise the resulting behaviour is undefined.
+			~RWMutexIP();
+
+			/// Init
+			/// Initializes the mutex if not done so in the constructor.
+			/// This should only be called in the case that this class was constructed 
+			/// with RWMutexIP(NULL, false).
+			bool Init(const RWMutexIPParameters* pRWMutexIPParameters);
+
+			/// Lock
+			/// Returns the new lock count for the given lock type.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			int Lock(LockType lockType, const ThreadTime& timeoutAbsolute = EA::Thread::kTimeoutNone);
+
+			/// Unlock
+			/// Unlocks the mutex. The mutex must already be locked by  the 
+			/// calling thread. Otherwise the behaviour is not defined.
+			/// Return value is the lock count value immediately upon unlock
+			/// or is one of enum Result.
+			int Unlock();
+
+			/// GetLockCount
+			int GetLockCount(LockType lockType);
+
+			/// GetPlatformData
+			/// Returns the platform-specific mpData handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mRWMutexIPData; }
+
+		protected:
+			EARWMutexIPData mRWMutexIPData;
+
+		private:
+			// Objects of this class are not copyable.
+			RWMutexIP(const RWMutexIP&){}
+			RWMutexIP& operator=(const RWMutexIP&){ return *this; }
+		};
+
+
+		/// RWMutexIPFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWMutexIP.
+		/// A primary use of this would be to allow the RWMutexIP implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API RWMutexIPFactory
+		{
+		public:
+			static RWMutexIP*  CreateRWMutexIP();                         // Internally implemented as: return new RWMutexIP;
+			static void        DestroyRWMutexIP(RWMutexIP* pRWMutex);     // Internally implemented as: delete pRWMutex;
+
+			static size_t      GetRWMutexIPSize();                        // Internally implemented as: return sizeof(RWMutexIP);
+			static RWMutexIP*  ConstructRWMutexIP(void* pMemory);         // Internally implemented as: return new(pMemory) RWMutexIP;
+			static void        DestructRWMutexIP(RWMutexIP* pRWMutex);    // Internally implemented as: pRWMutex->~RWMutexIP();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoRWMutexIP
+		/// An AutoRWMutex locks the RWMutexIP in its constructor and 
+		/// unlocks the AutoRWMutex in its destructor (when it goes out of scope).
+		class AutoRWMutexIP
+		{
+		public:
+			AutoRWMutexIP(RWMutexIP& mutex, RWMutexIP::LockType lockType) 
+				: mMutex(mutex)
+				{  mMutex.Lock(lockType); }
+
+		  ~AutoRWMutexIP()
+				{  mMutex.Unlock(); }
+
+		protected:
+			RWMutexIP& mMutex;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWMutexIP(const AutoRWMutexIP&);
+			const AutoRWMutexIP& operator=(const AutoRWMutexIP&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_RWMUTEX_IP_H
+
+
+
+
+
+
+

+ 253 - 0
include/eathread/eathread_rwsemalock.h

@@ -0,0 +1,253 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+//---------------------------------------------------------
+// For conditions of distribution and use, see
+// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE
+//---------------------------------------------------------
+
+#ifndef EATHREAD_EATHREAD_RWSEMALOCK_H
+#define EATHREAD_EATHREAD_RWSEMALOCK_H
+
+#include "eathread_atomic.h"
+#include "eathread_semaphore.h"
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		//---------------------------------------------------------
+		// RWSemaLock
+		//---------------------------------------------------------
+		class RWSemaLock
+		{
+		public:
+			RWSemaLock() : mStatus(0) {}
+			RWSemaLock(const RWSemaLock&) = delete;
+			RWSemaLock(RWSemaLock&&) = delete;
+			RWSemaLock& operator=(const RWSemaLock&) = delete;
+			RWSemaLock& operator=(RWSemaLock&&) = delete;
+
+			void ReadLock()
+			{
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					newStatus.data = oldStatus.data;
+
+					if (oldStatus.writers > 0)
+					{
+						newStatus.waitToRead++;
+					}
+					else
+					{
+						newStatus.readers++;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				if (oldStatus.writers > 0)
+				{
+					mReadSema.Wait();
+				}
+			}
+
+			bool ReadTryLock()
+			{
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					newStatus.data = oldStatus.data;
+
+					if (oldStatus.writers > 0)
+					{
+						return false;
+					}
+					else
+					{
+						newStatus.readers++;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				return true;
+			}
+
+			void ReadUnlock()
+			{
+				Status oldStatus;
+				oldStatus.data = mStatus.Add(-Status::kIncrementRead) + Status::kIncrementRead;
+
+				EAT_ASSERT(oldStatus.readers > 0);
+				if (oldStatus.readers == 1 && oldStatus.writers > 0)
+				{
+					mWriteSema.Post();
+				}
+			}
+
+			void WriteLock()
+			{
+				Status oldStatus;
+				oldStatus.data = mStatus.Add(Status::kIncrementWrite) - Status::kIncrementWrite;
+				EAT_ASSERT(oldStatus.writers + 1 <= Status::kMaximum);
+				if (oldStatus.readers > 0 || oldStatus.writers > 0)
+				{
+					mWriteSema.Wait();
+				}
+			}
+
+			bool WriteTryLock()
+			{
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					newStatus.data = oldStatus.data;
+
+					if (oldStatus.writers > 0 || oldStatus.readers > 0)
+					{
+						return false;
+					}
+					else
+					{
+						newStatus.writers++;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				return true;
+			}
+
+			void WriteUnlock()
+			{
+				uint32_t waitToRead = 0;
+				Status oldStatus, newStatus;
+				do
+				{
+					oldStatus.data = mStatus.GetValue();
+					EAT_ASSERT(oldStatus.readers == 0);
+					newStatus.data = oldStatus.data;
+					newStatus.writers--;
+					waitToRead = oldStatus.waitToRead;
+					if (waitToRead > 0)
+					{
+						newStatus.waitToRead = 0;
+						newStatus.readers = waitToRead;
+					}
+					// CAS until successful. On failure, oldStatus will be updated with the latest value.
+				}
+				while (!mStatus.SetValueConditional(newStatus.data, oldStatus.data));
+
+				if (waitToRead > 0)
+				{
+					mReadSema.Post(waitToRead);
+				}
+				else if (oldStatus.writers > 1)
+				{
+					mWriteSema.Post();
+				}
+			}
+
+			// NOTE(rparolin): 
+			// Since the RWSemaLock uses atomics to update its status flags before blocking on a semaphore, you cannot
+			// rely on the answer the IsReadLocked/IsWriteLocked gives you.  It's at a best a guess and you can't rely
+			// on it for any kind of validation checks which limits its usefulness.  In addition, the original
+			// implementation from Preshing does not include such functionality. 
+			//
+			// bool IsReadLocked() {...}
+			// bool IsWriteLocked() {...}
+
+		protected:
+			EA_DISABLE_VC_WARNING(4201) // warning C4201: nonstandard extension used: nameless struct/union
+			union Status
+			{
+				enum
+				{
+					kIncrementRead			= 1,
+					kIncrementWaitToRead	= 1 << 10,
+					kIncrementWrite			= 1 << 20,
+					kMaximum				= (1 << 10) - 1,
+				};
+
+				struct 
+				{
+					int readers		: 10; // 10-bits = 1024
+					int waitToRead	: 10;
+					int writers		: 10;
+					int pad			: 2;
+				};
+
+				int data;
+			};
+			EA_RESTORE_VC_WARNING()
+
+			AtomicInt32 mStatus;
+			Semaphore mReadSema;  // semaphores are non-copyable
+			Semaphore mWriteSema; // semaphores are non-copyable
+		};
+
+
+		//---------------------------------------------------------
+		// ReadLockGuard
+		//---------------------------------------------------------
+		class AutoSemaReadLock
+		{
+		private:
+			RWSemaLock& m_lock;
+
+		public:
+			AutoSemaReadLock(const AutoSemaReadLock&) = delete;
+			AutoSemaReadLock(AutoSemaReadLock&&) = delete;
+			AutoSemaReadLock& operator=(const AutoSemaReadLock&) = delete;
+			AutoSemaReadLock& operator=(AutoSemaReadLock&&) = delete;
+
+			AutoSemaReadLock(RWSemaLock& lock) : m_lock(lock)
+			{
+				m_lock.ReadLock();
+			}
+
+			~AutoSemaReadLock()
+			{
+				m_lock.ReadUnlock();
+			}
+		};
+
+
+		//---------------------------------------------------------
+		// WriteLockGuard
+		//---------------------------------------------------------
+		class AutoSemaWriteLock
+		{
+		private:
+			RWSemaLock& m_lock;
+
+		public:
+			AutoSemaWriteLock(const AutoSemaWriteLock&) = delete;
+			AutoSemaWriteLock(AutoSemaWriteLock&&) = delete;
+			AutoSemaWriteLock& operator=(const AutoSemaWriteLock&) = delete;
+			AutoSemaWriteLock& operator=(AutoSemaWriteLock&&) = delete;
+
+			AutoSemaWriteLock(RWSemaLock& lock) : m_lock(lock)
+			{
+				m_lock.WriteLock();
+			}
+
+			~AutoSemaWriteLock()
+			{
+				m_lock.WriteUnlock();
+			}
+		};
+	}
+}
+
+#endif // EATHREAD_EATHREAD_RWSEMALOCK_H

+ 408 - 0
include/eathread/eathread_rwspinlock.h

@@ -0,0 +1,408 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an efficient proper multithread-safe spinlock which supports
+// multiple readers but a single writer.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_RWSPINLOCK_H
+#define EATHREAD_EATHREAD_RWSPINLOCK_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_atomic.h>
+#include <new>
+
+
+#ifdef _MSC_VER
+	 #pragma warning(push)
+	 #pragma warning(disable: 4100) // (Compiler claims pRWSpinLock is unreferenced)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class RWSpinLock
+		///
+		/// A RWSpinLock is like a SpinLock except you can have multiple
+		/// readers but a single exclusive writer. This is very beneficial for 
+		/// situations whereby there are many consumers of some data but only 
+		/// one producer of the data. Unlock many thread-level read/write lock
+		/// implementations, this spin lock, like many others, follows the most
+		/// lean approach and does not do arbitration or fairness. The result is
+		/// that if you have many readers who are constantly locking the read
+		/// lock, write lock attempts may not be able to succeed. So you need to
+		/// be careful in how you use this.
+		///
+		/// We take a page from the Linux kernel here and implement read/write
+		/// locks via a mechanism that uses a 'bias' value and limits the number
+		/// of total readers to 2^24-1, or 16,777,214. This shouldn't be a problem.
+		/// When the spinlock is unlocked, the value is 0x01000000.
+		/// Readers decrement the lock by one each, so when the spinlock is 
+		/// read-locked, the value is between 1 and 0x00ffffff. Writers decrement
+		/// the lock by 0x01000000, so when a spinlock is write-locked, the value
+		/// must be zero. It must be zero because there can only be one writer
+		/// and because there can be no readers when there is a writer. When a 
+		/// reader attempts to get a read-lock, it decrements the lock count and 
+		/// examines the new value. If the new value is < 0, then there was a 
+		/// write-lock present and so the reader immediately increments the lock
+		/// count and tries again later. There are two results that come about due
+		/// to this: 
+		///     1) In the case of 32 bit integers, if by some wild chance of nature
+		///         there are 256 or more reader threads and there is a writer thread
+		///         with a write lock and every one of the reader threads executes 
+		///         the same decrement and compare to < 0 at the same time, then the
+		///         257th thread will mistakenly think that there isn't a write lock.
+		///     2) The logic to test if a write-lock is taken is not to compare
+		///         against zero but to compare against (> -255 and <= 0). This is
+		///         because readers will occasionally be 'mistakenly' decrementing
+		///         the lock while trying to obtain read access.
+		///
+		/// We thus have the following possible values:
+		///     0 < value < 0x01000000    ----> read-locked
+		///     value == 0x01000000       ----> unlocked
+		///     0x01000000 < value <= 0   ----> write-locked
+		///
+		class RWSpinLock
+		{
+		public:
+			RWSpinLock();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang. 
+			// This function can be called if the current thread already 
+			// has a read lock, though all read locks must be matched by unlocks. 
+			void ReadLock();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang.
+			// This function can be called if the current thread already 
+			// has a read lock (in which case it will always succeed), 
+			// though all read locks must be matched by unlocks. 
+			bool ReadTryLock();
+
+			// Returns true if any thread currently has a read lock. 
+			// The return value is subject to be out of date by the 
+			// time it is read by the current thread, unless the current
+			// thread has a read lock. If IsReadLocked is true, then 
+			// at that moment IsWriteLocked is necessarily false.
+			// If IsReadLocked is false, IsWriteLock may be either true or false.
+			bool IsReadLocked() const;
+
+			// Unlocks for reading, as a match to ReadLock or a successful
+			// ReadTryLock. All read locks must be matched by ReadUnlock with
+			// the same thread that has the read lock.
+			void ReadUnlock();
+
+			// This function cannot be called while the current thread  
+			// already has a read or write lock, else this function will hang. 
+			void WriteLock();
+
+			// If this function is called while the current thread already 
+			// has a read or write lock, it will always return false.
+			bool WriteTryLock();
+
+			// If this function returns true, then IsReadLocked must at that moment
+			// be false.
+			bool IsWriteLocked() const;
+
+			// Matches WriteLock or a successful WriteTryLock.
+			void WriteUnlock();
+
+			// Returns the address of mValue. This value should be read for 
+			// diagnostic purposes only and should not be written.
+			void* GetPlatformData();
+
+		public:
+			enum Value
+			{
+				kValueUnlocked = 0x01000000
+			};
+
+			AtomicInt32 mValue;
+		};
+
+
+
+		/// RWSpinLockFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWSpinlock.
+		/// A primary use of this would be to allow the RWSpinlock implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		///
+		class EATHREADLIB_API RWSpinLockFactory
+		{
+		public:
+			static RWSpinLock* CreateRWSpinLock();
+			static void        DestroyRWSpinLock(RWSpinLock* pRWSpinLock);
+			static size_t      GetRWSpinLockSize();
+			static RWSpinLock* ConstructRWSpinLock(void* pMemory);
+			static void        DestructRWSpinLock(RWSpinLock* pRWSpinLock);
+		};
+
+
+
+		/// class AutoRWSpinLock
+		///
+		/// Example usage:
+		///     void Function() {
+		///         AutoRWSpinLock autoLock(AutoRWSpinLock::kLockTypeRead);
+		///         // Do something
+		///     }
+		///
+		class AutoRWSpinLock
+		{
+		public:
+			enum LockType
+			{
+				kLockTypeRead, 
+				kLockTypeWrite
+			};
+
+			AutoRWSpinLock(RWSpinLock& spinLock, LockType lockType) ;
+		   ~AutoRWSpinLock();
+
+		protected:
+			RWSpinLock& mSpinLock;
+			LockType    mLockType;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWSpinLock(const AutoRWSpinLock&);
+			const AutoRWSpinLock& operator=(const AutoRWSpinLock&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Thread
+	{
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLock::RWSpinLock()
+			: mValue(kValueUnlocked)
+		{
+		}
+
+
+		inline
+		void RWSpinLock::ReadLock()
+		{
+			Top: // Due to modern processor branch prediction, the compiler will optimize better for true branches and so we do a manual goto loop here.
+			if((unsigned)mValue.Decrement() < kValueUnlocked)
+				return;
+			mValue.Increment();
+			while(mValue.GetValueRaw() <= 0){ // It is better to do this polling loop as a first check than to 
+				#ifdef EA_THREAD_COOPERATIVE  // do an atomic decrement repeatedly, as the atomic lock is 
+					ThreadSleep();            // potentially not a cheap thing due to potential bus locks on some platforms..
+				#else
+					EAProcessorPause();       // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.     
+				#endif
+			}
+			goto Top;
+		}
+
+
+		inline
+		bool RWSpinLock::ReadTryLock()
+		{
+			const unsigned nNewValue = (unsigned)mValue.Decrement();
+			if(nNewValue < kValueUnlocked) // Given that nNewValue is unsigned, we don't need to test for < 0.
+				return true;
+			mValue.Increment();
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLock::IsReadLocked() const
+		{
+			const unsigned nValue = (unsigned)mValue.GetValue();
+			return ((nValue - 1) < (kValueUnlocked - 1)); // Given that nNewValue is unsigned, this is faster than comparing ((n > 0) && (n < kValueUnlocked)), due to the presence of only one comparison instead of two.
+		}
+
+
+		inline
+		void RWSpinLock::ReadUnlock()
+		{
+			mValue.Increment();
+		}
+
+
+		inline
+		void RWSpinLock::WriteLock()
+		{
+			Top: 
+			if(mValue.Add(-kValueUnlocked) == 0)
+				return;
+			mValue.Add(kValueUnlocked);
+			while(mValue.GetValueRaw() != kValueUnlocked){  // It is better to do this polling loop as a first check than to
+				#ifdef EA_THREAD_COOPERATIVE             // do an atomic decrement repeatedly, as the atomic lock is 
+					ThreadSleep();                       // potentially not a cheap thing due to potential bus locks on some platforms..
+				#else
+					EAProcessorPause();                  // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.     
+				#endif
+			}
+			goto Top;
+		}
+
+
+		inline
+		bool RWSpinLock::WriteTryLock()
+		{
+			if(mValue.Add(-kValueUnlocked) == 0)
+				return true;
+			mValue.Add(kValueUnlocked);
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLock::IsWriteLocked() const
+		{
+			 return (mValue.GetValue() <= 0); // This fails to work if 127 threads at once are in the middle of a failed write lock attempt.
+		}
+
+
+		inline
+		void RWSpinLock::WriteUnlock()
+		{
+			mValue.Add(kValueUnlocked);
+		}
+
+
+		inline
+		void* RWSpinLock::GetPlatformData() 
+		{
+			return &mValue;
+		}
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLockFactory
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLock* RWSpinLockFactory::CreateRWSpinLock() 
+		{
+			Allocator* pAllocator = GetAllocator();
+
+			if(pAllocator)
+				return new(pAllocator->Alloc(sizeof(RWSpinLock))) RWSpinLock;
+			else
+				return new RWSpinLock;
+		}
+
+
+		inline
+		void RWSpinLockFactory::DestroyRWSpinLock(RWSpinLock* pRWSpinLock)
+		{
+			Allocator* pAllocator = GetAllocator();
+
+			if(pAllocator)
+			{
+				pRWSpinLock->~RWSpinLock();
+				pAllocator->Free(pRWSpinLock);
+			}
+			else
+				delete pRWSpinLock;
+		}
+
+
+		inline
+		size_t RWSpinLockFactory::GetRWSpinLockSize()
+		{
+			return sizeof(RWSpinLock);
+		}
+
+
+		inline
+		RWSpinLock* RWSpinLockFactory::ConstructRWSpinLock(void* pMemory)
+		{
+			return new(pMemory) RWSpinLock;
+		}
+
+
+		inline
+		void RWSpinLockFactory::DestructRWSpinLock(RWSpinLock* pRWSpinLock)
+		{
+			pRWSpinLock->~RWSpinLock();
+		}
+
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// AutoRWSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		AutoRWSpinLock::AutoRWSpinLock(RWSpinLock& spinLock, LockType lockType) 
+			: mSpinLock(spinLock), mLockType(lockType)
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLock.ReadLock();
+			else
+				mSpinLock.WriteLock();
+		}
+
+
+		inline
+		AutoRWSpinLock::~AutoRWSpinLock()
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLock.ReadUnlock();
+			else
+				mSpinLock.WriteUnlock();
+		}
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_RWSPINLOCK_H
+
+
+
+
+
+

+ 452 - 0
include/eathread/eathread_rwspinlockw.h

@@ -0,0 +1,452 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an efficient proper multithread-safe spinlock which supports
+// multiple simultaneous readers but a single writer, where writers get
+// priority over readers.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_RWSPINLOCKW_H
+#define EATHREAD_EATHREAD_RWSPINLOCKW_H
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_atomic.h>
+#include <new>
+
+
+#ifdef _MSC_VER
+	#pragma warning(push)
+	#pragma warning(disable: 4100) // (Compiler claims pRWSpinLockW is unreferenced)
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class RWSpinLockW
+		///
+		/// This class differs from RWSpinLock in that it gives writers priority.
+		/// In exchange for that feature, this version doesn't allow recursive
+		/// read locks and it becomes inefficient due to excessive spinning if
+		/// there are very many simultaneous readers.
+		/// 
+		/// A RWSpinLockW is like a SpinLock except you can have multiple
+		/// readers but a single exclusive writer. This is very beneficial for 
+		/// situations whereby there are many consumers of some data but only 
+		/// one producer of the data. Unlock many thread-level read/write lock
+		/// implementations, this spin lock, like many others, follows the most
+		/// lean approach and does not do arbitration or fairness. The result is
+		/// that if you have many readers who are constantly locking the read
+		/// lock, write lock attempts may not be able to succeed. So you need to
+		/// be careful in how you use this.
+		///
+		/// Note the usage of GetValueRaw in the source code for this class.
+		/// Use of GetValueRaw instead of GetValue is due to a tradeoff that
+		/// has been chosen. GetValueRaw does not come with memory read barrier
+		/// and thus the read value may be out of date. This is OK because it's 
+		/// only used as a rule of thumb to help decide what synchronization 
+		/// primitive to use next. This results in significantly faster execution
+		/// because only one memory synchronization primitive is typically 
+		/// executed instead of two. The problem with GetValueRaw, however, 
+		/// is that in cases where there is very high locking activity from 
+		/// many threads simultaneously GetValueRaw usage could result in 
+		/// a "bad guess" as to what to do next and can also result in a lot
+		/// of spinning, even infinite spinning in the most pathological case.
+		/// However, in practice on the platforms that target this situation
+		/// is unlikely to the point of being virtually impossible in practice.
+		/// And if it was possible then we recommend the user use a different
+		/// mechanism, such as the regular EAThread RWSpinLockW.
+		/// 
+		class RWSpinLockW
+		{
+		public:
+			RWSpinLockW();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang. 
+			// Nor can this function can be called if the current thread  
+			// already has a read lock, as it can result in a hang.
+			void ReadLock();
+
+			// This function cannot be called while the current thread  
+			// already has a write lock, else this function will hang.
+			// Nor can this function can be called if the current thread  
+			// already has a read lock, as it can result in a hang. 
+			bool ReadTryLock();
+
+			// If this function returns true, then IsReadLocked must at that moment
+			// be false.
+			bool IsReadLocked() const;
+
+			void ReadUnlock();
+
+			// This function cannot be called while the current thread  
+			// already has a read or write lock, else this function will hang. 
+			void WriteLock();
+
+			// If this function is called while the current thread already 
+			// has a read or write lock, it will always return false.
+			bool WriteTryLock();
+
+			// If this function returns true, then IsReadLocked must at that moment
+			// be false.
+			bool IsWriteLocked() const;
+
+			void WriteUnlock();
+
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData();
+
+		protected:
+			enum Value
+			{
+				kWriteLockBit       = 0x80000000,
+				kWriteWaitingInc    = 0x00010000,
+				kReadLockInc        = 0x00000001,
+				kWriteWaitingMask   = 0x7FFF0000,
+				kReadLockMask       = 0x0000FFFF,
+				kLockAllMask        = kWriteLockBit | kReadLockMask,
+				kWriteAllMask       = kWriteLockBit | kWriteWaitingMask,
+			};
+
+			AtomicInt32 mValue;
+		};
+
+
+
+		/// RWSpinLockWFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class RWSpinLockW.
+		/// A primary use of this would be to allow the RWSpinLockW implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		///
+		class EATHREADLIB_API RWSpinLockWFactory
+		{
+		public:
+			static RWSpinLockW* CreateRWSpinLockW();
+			static void         DestroyRWSpinLockW(RWSpinLockW* pRWSpinLockW);
+			static size_t       GetRWSpinLockWSize();
+			static RWSpinLockW* ConstructRWSpinLockW(void* pMemory);
+			static void         DestructRWSpinLockW(RWSpinLockW* pRWSpinLockW);
+		};
+
+
+
+		/// class AutoRWSpinLockW
+		///
+		/// Example usage:
+		///     void Function() {
+		///         AutoRWSpinLockW autoLock(AutoRWSpinLockW::kLockTypeRead);
+		///         // Do something
+		///     }
+		///
+		class AutoRWSpinLockW
+		{
+		public:
+			enum LockType
+			{
+				kLockTypeRead, 
+				kLockTypeWrite
+			};
+
+			AutoRWSpinLockW(RWSpinLockW& SpinLockW, LockType lockType);
+		   ~AutoRWSpinLockW();
+
+		protected:
+			RWSpinLockW& mSpinLockW;
+			LockType     mLockType;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoRWSpinLockW(const AutoRWSpinLockW&);
+			const AutoRWSpinLockW& operator=(const AutoRWSpinLockW&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLockW
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLockW::RWSpinLockW()
+			: mValue(0)
+		{
+		}
+
+
+		inline
+		void RWSpinLockW::ReadLock()
+		{
+			int32_t currVal = mValue.GetValueRaw(); // See not above about GetValueRaw usage.
+
+			// If there is no writer nor waiting writers, attempt a read lock.
+			if( (currVal & kWriteAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal + kReadLockInc, currVal ) )
+					return;
+			}
+
+			// Spin until there is no writer, any waiting writers, nor any read lockers.
+			// By waiting till there no read or write lockers, we tend to avoid the case
+			// whereby readers starve out writers. The downside is that a lot of read
+			// activity can cause read parallelism to be reduced and read threads waiting
+			// for each other. 
+			do
+			{
+				EA_THREAD_DO_SPIN();
+				currVal = mValue.GetValue();    // or EAReadBarrier(); mValue.GetValueRaw();
+			}while (currVal & kLockAllMask); // or kWriteAllMask
+
+			// At this point, we ignore waiting writers and take the lock if we 
+			// can. Any waiting writers that have shown up right as we execute this 
+			// code aren't given any priority over us, unlike above where they are.
+			for( ;; )
+			{
+				// This code has a small problem in that a large number of simultaneous
+				// frequently locking/unlocking readers can cause this code to spin
+				// a lot (in theory, indefinitely). However, in practice our use cases
+				// and target hardware shouldn't cause this to happen.
+				if( (currVal & kWriteLockBit) == 0 )                                             
+				{
+					if( mValue.SetValueConditional( currVal + kReadLockInc, currVal ) )
+						return;
+				}
+
+				EA_THREAD_DO_SPIN();
+				currVal = mValue.GetValue(); // or EAReadBarrier(); mValue.GetValueRaw();
+			}
+		}
+
+
+		inline
+		bool RWSpinLockW::ReadTryLock()
+		{
+			int32_t currVal = mValue.GetValueRaw();
+
+			// If there is no writer nor waiting writers, attempt a read lock.
+			if( (currVal & kWriteAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal + kReadLockInc, currVal ) )
+					return true;
+			}
+
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLockW::IsReadLocked() const
+		{
+			// This return value has only diagnostic meaning. It cannot be used for thread synchronization purposes.
+			return ((mValue.GetValueRaw() & kReadLockMask) != 0);
+		}
+
+
+		inline
+		void RWSpinLockW::ReadUnlock()
+		{
+			EAT_ASSERT(IsReadLocked());  // This can't tell us if the current thread was one of the lockers. But it's better than nothing as a debug test.
+			mValue.Add( -kReadLockInc );
+		}
+
+
+		inline
+		void RWSpinLockW::WriteLock()
+		{
+			int32_t currVal = mValue.GetValueRaw();
+
+			// If there is no writer, waiting writers, nor readers, attempt a write lock.
+			if( (currVal & kLockAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal | kWriteLockBit, currVal ) )
+					return;
+			}
+
+			// Post a waiting write. This will make new readers spin until all existing
+			// readers have released their lock, so that we get an even chance.
+			mValue.Add( kWriteWaitingInc );
+
+			// Spin until we get the lock.
+			for( ;; )
+			{
+				if( (currVal & kLockAllMask) == 0 )                                             
+				{
+					if( mValue.SetValueConditional( (currVal | kWriteLockBit) - kWriteWaitingInc, currVal ) )
+						return;
+				}
+
+				EA_THREAD_DO_SPIN();
+				currVal = mValue.GetValue(); // or EAReadBarrier(); mValue.GetValueRaw();
+			}
+		}
+
+
+		inline
+		bool RWSpinLockW::WriteTryLock()
+		{
+			int32_t currVal = mValue.GetValueRaw();
+
+			// If there is no writer, waiting writers, nor readers, attempt a write lock.
+			if( (currVal & kLockAllMask) == 0 )                                             
+			{
+				if( mValue.SetValueConditional( currVal | kWriteLockBit, currVal ) )
+					return true;
+			}
+
+			return false;
+		}
+
+
+		inline
+		bool RWSpinLockW::IsWriteLocked() const
+		{
+			// This return value has only diagnostic meaning. It cannot be used for thread synchronization purposes.
+			return ( (mValue.GetValueRaw() & kWriteLockBit) != 0 );
+		}
+
+
+		inline
+		void RWSpinLockW::WriteUnlock()
+		{
+			EAT_ASSERT(IsWriteLocked());
+			mValue.Add( -kWriteLockBit );
+		}
+
+
+		inline
+		void* RWSpinLockW::GetPlatformData()
+		{
+			return &mValue;
+		}
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// RWSpinLockFactory
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		RWSpinLockW* RWSpinLockWFactory::CreateRWSpinLockW() 
+		{
+			if(gpAllocator)
+				return new(gpAllocator->Alloc(sizeof(RWSpinLockW))) RWSpinLockW;
+			else
+				return new RWSpinLockW;
+		}
+
+		inline
+		void RWSpinLockWFactory::DestroyRWSpinLockW(RWSpinLockW* pRWSpinLock)
+		{
+			if(gpAllocator)
+			{
+				pRWSpinLock->~RWSpinLockW();
+				gpAllocator->Free(pRWSpinLock);
+			}
+			else
+				delete pRWSpinLock;
+		}
+
+		inline
+		size_t RWSpinLockWFactory::GetRWSpinLockWSize()
+		{
+			return sizeof(RWSpinLockW);
+		}
+
+		inline
+		RWSpinLockW* RWSpinLockWFactory::ConstructRWSpinLockW(void* pMemory)
+		{
+			return new(pMemory) RWSpinLockW;
+		}
+
+		inline
+		void RWSpinLockWFactory::DestructRWSpinLockW(RWSpinLockW* pRWSpinLock)
+		{
+			pRWSpinLock->~RWSpinLockW();
+		}
+
+
+
+		///////////////////////////////////////////////////////////////////////
+		// AutoRWSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		AutoRWSpinLockW::AutoRWSpinLockW(RWSpinLockW& spinLock, LockType lockType) 
+			: mSpinLockW(spinLock), mLockType(lockType)
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLockW.ReadLock();
+			else
+				mSpinLockW.WriteLock();
+		}
+
+
+		inline
+		AutoRWSpinLockW::~AutoRWSpinLockW()
+		{ 
+			if(mLockType == kLockTypeRead)
+				mSpinLockW.ReadUnlock();
+			else
+				mSpinLockW.WriteUnlock();
+		}
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+#endif // Header include guard
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 339 - 0
include/eathread/eathread_semaphore.h

@@ -0,0 +1,339 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements a semaphore thread synchronization class.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_SEMAPHORE_H
+#define EATHREAD_EATHREAD_SEMAPHORE_H
+
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+//
+// Defined as 0 or 1. Defined as 1 if the OS provides no native semaphore support.
+//
+#ifndef EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+	#define EATHREAD_USE_SYNTHESIZED_SEMAPHORE 0
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_FAST_MS_SEMAPHORE_ENABLED
+//
+// Defined as 0 or 1.
+// Enables the usage of a faster intra-process semaphore on Microsoft platforms.
+// By faster we mean that it is typically 10x or more faster.
+// Has the downside that it is not interchangeable with the SEMAPHORE built-in
+// type and it's behaviour won't be strictly identical.
+// Even if this option is enabled, you can still get the built-in behaviour
+// of Microsoft semaphores by specifying the semaphore as inter-process.
+//
+#ifndef EATHREAD_FAST_MS_SEMAPHORE_ENABLED
+	#define EATHREAD_FAST_MS_SEMAPHORE_ENABLED 1
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EASemaphoreData
+///
+/// This is used internally by class Semaphore.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+/// 
+#if !EA_THREADS_AVAILABLE
+	struct EASemaphoreData
+	{
+		volatile int mnCount;
+		int mnMaxCount;
+
+		EASemaphoreData();
+	};
+
+#elif EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+	#include <eathread/eathread_condition.h>
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_atomic.h>
+
+	struct EASemaphoreData
+	{
+		EA::Thread::Condition   mCV;
+		EA::Thread::Mutex       mMutex;
+		EA::Thread::AtomicInt32 mnCount;
+		int                     mnMaxCount;
+		bool                    mbValid;
+
+		EASemaphoreData();
+	};
+
+#elif defined(__APPLE__)
+
+	#include <mach/semaphore.h>
+	#include <eathread/eathread_atomic.h>
+
+	struct EASemaphoreData
+	{
+		semaphore_t mSemaphore;
+		EA::Thread::AtomicInt32 mnCount;
+		int  mnMaxCount;
+		bool mbIntraProcess;
+		
+		EASemaphoreData();
+	};
+
+#elif defined(EA_PLATFORM_SONY)
+	#include <kernel/semaphore.h>
+	#include <eathread/eathread_atomic.h>
+	#include <eathread/internal/timings.h>
+	struct EASemaphoreData
+	{
+		SceKernelSema mSemaphore;
+
+		int  mnMaxCount;
+		EA::Thread::AtomicInt32 mnCount;
+
+		EASemaphoreData();
+	};
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <semaphore.h>
+	#include <eathread/eathread_atomic.h>
+
+	#if defined(EA_PLATFORM_WINDOWS)
+		#ifdef CreateSemaphore
+			#undef CreateSemaphore // Windows #defines CreateSemaphore to CreateSemaphoreA or CreateSemaphoreW.
+		#endif
+	#endif
+
+	struct EASemaphoreData
+	{
+		sem_t mSemaphore;
+		EA::Thread::AtomicInt32 mnCount;
+		int  mnMaxCount;
+		bool mbIntraProcess;
+
+		EASemaphoreData();
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+	#ifdef CreateSemaphore
+		#undef CreateSemaphore // Windows #defines CreateSemaphore to CreateSemaphoreA or CreateSemaphoreW.
+	#endif
+
+	struct EATHREADLIB_API EASemaphoreData
+	{
+		void*   mhSemaphore;    // We use void* instead of HANDLE in order to avoid #including windows.h. HANDLE is typedef'd to (void*) on all Windows-like platforms.
+		int32_t mnCount;        // Number of available posts. Under the fast semaphore pathway, a negative value means there are waiters.
+		int32_t mnCancelCount;  // Used by fast semaphore logic. Is the deferred cancel count.
+		int32_t mnMaxCount;     // 
+		bool    mbIntraProcess; // Used under Windows, which can have multiple processes. Always true for XBox.
+
+		EASemaphoreData();
+		void UpdateCancelCount(int32_t n);
+	};
+
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// SemaphoreParameters
+		/// Specifies semaphore settings.
+		struct EATHREADLIB_API SemaphoreParameters
+		{
+			int  mInitialCount;  /// Initial available count
+			int  mMaxCount;      /// Max possible count. Defaults to INT_MAX.
+			bool mbIntraProcess; /// True if the semaphore is intra-process, else inter-process.
+			char mName[16];      /// Semaphore name, applicable only to platforms that recognize named synchronization objects.
+
+			SemaphoreParameters(int initialCount = 0, bool bIntraProcess = true, const char* pName = NULL);
+		};
+
+
+		/// class Semaphore
+		/// A semaphore is an object which has an associated count which is >= 0 and
+		/// a value > 0 means that a thread can 'grab' the semaphore and decrement its
+		/// value by one. A value of 0 means that threads must wait until another thread
+		/// 'un-grabs' the semaphore. Thus a semaphore is like a car rental agency which
+		/// has a limited number of cars for rent and if they are out of cars, you have 
+		/// to wait until one of the renters returns their car.
+		class EATHREADLIB_API Semaphore
+		{
+		public:
+			enum Result{
+				kResultError   = -1,
+				kResultTimeout = -2
+			};
+
+			/// Semaphore
+			/// For immediate default initialization, use no args.
+			/// For custom immediate initialization, supply a first argument. 
+			/// For deferred initialization, use Semaphore(NULL, false) then later call Init.
+			/// For deferred initialization of an array of objects, create an empty
+			/// subclass whose default constructor chains back to Semaphore(NULL, false).
+			Semaphore(const SemaphoreParameters* pSemaphoreParameters = NULL, bool bDefaultParameters = true);
+
+			/// Semaphore
+			/// This is a constructor which initializes the Semaphore to a specific count 
+			/// and intializes the other Semaphore parameters to default values. See the
+			/// SemaphoreParameters struct for info on these default values.
+			Semaphore(int initialCount);
+
+			/// ~Semaphore
+			/// Destroys an existing semaphore. The semaphore must not be locked 
+			/// by any thread, otherwise the resulting behaviour is undefined.
+			~Semaphore();
+
+			/// Init
+			/// Initializes the semaphore with given parameters.
+			bool Init(const SemaphoreParameters* pSemaphoreParameters);
+
+			/// Wait
+			/// Locks the semaphore (reducing its count by one) or gives up trying to 
+			/// lock it after a given timeout has expired. If the semaphore count is > 0
+			/// then the count will be reduced by one. If the semaphore count is 0, the
+			/// call will block until another thread unlocks it or the timeout expires.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			/// Return value:
+			///     kResultError      The semaphore could not be obtained due to error.
+			///     kResultTimeout    The semaphore could not be obtained due to timeout.
+			///     >= 0              The new count for the semaphore.
+			///
+			/// It's possible that two threads waiting on the same semaphore will return 
+			/// with a result of zero. Thus you cannot rely on the semaphore's return value
+			/// to ascertain which was the last thread to return from the Wait. 
+			int Wait(const ThreadTime& timeoutAbsolute = kTimeoutNone);
+
+			/// Post
+			/// Increments the signalled value of the semaphore by the count. 
+			/// Returns the available count after the operation has completed. 
+			/// Returns kResultError upon error. A Wait is often eventually 
+			/// followed by a corresponding Post.
+			/// For the case of count being greater than 1, not all platforms
+			/// act the same. If count results in exceeding the max count then
+			/// kResultError is returned. Some platforms return kResultError 
+			/// before any of account is applied, while others return 
+			/// kResultError after some of count has been applied.
+			int Post(int count = 1);
+
+			/// GetCount
+			/// Returns current number of available locks associated with the semaphore.
+			/// This is useful for debugging and for quick polling checks of the 
+			/// status of the semaphore. This value changes over time as multiple
+			/// threads wait and post to the semaphore. This value cannot be trusted
+			/// to exactly represent the count upon its return if multiple threads are 
+			/// using this Semaphore at the time.
+			int GetCount() const;
+
+			/// GetPlatformData
+			/// Returns the platform-specific data handle for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mSemaphoreData; }
+
+		protected:
+			EASemaphoreData mSemaphoreData;
+
+		private:
+			// Objects of this class are not copyable.
+			Semaphore(const Semaphore&){}
+			Semaphore& operator=(const Semaphore&){ return *this; }
+		};
+
+
+		/// SemaphoreFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Semaphore.
+		/// A primary use of this would be to allow the Semaphore implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API SemaphoreFactory
+		{
+		public:
+			static Semaphore* CreateSemaphore();                        // Internally implemented as: return new Semaphore;
+			static void       DestroySemaphore(Semaphore* pSemaphore);  // Internally implemented as: delete pSemaphore;
+
+			static size_t     GetSemaphoreSize();                       // Internally implemented as: return sizeof(Semaphore);
+			static Semaphore* ConstructSemaphore(void* pMemory);        // Internally implemented as: return new(pMemory) Semaphore;
+			static void       DestructSemaphore(Semaphore* pSemaphore); // Internally implemented as: pSemaphore->~Semaphore();
+		};
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoSemaphore
+		/// An AutoSemaphore grabs the Semaphore in its constructor and posts 
+		/// the Semaphore once in its destructor (when it goes out of scope).
+		class EATHREADLIB_API AutoSemaphore
+		{
+		public:
+			AutoSemaphore(Semaphore& semaphore) 
+				: mSemaphore(semaphore)
+				{ mSemaphore.Wait(); }
+
+			~AutoSemaphore()
+				{ mSemaphore.Post(1); }
+
+		protected:
+			Semaphore& mSemaphore;
+
+			// Prevent copying by default, as copying is dangerous.
+			AutoSemaphore(const AutoSemaphore&);
+			const AutoSemaphore& operator=(const AutoSemaphore&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+#endif // EATHREAD_EATHREAD_SEMAPHORE_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 319 - 0
include/eathread/eathread_spinlock.h

@@ -0,0 +1,319 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Implements an efficient proper multithread-safe spinlock.
+//
+// A spin lock is the lightest form of mutex available. The Lock operation is
+// simply a loop that waits to set a shared variable. SpinLocks are not 
+// recursive (i.e. they can only be locked once by a thread) and are 
+// intra-process only. You have to be careful using spin locks because if you 
+// have a high priority thread that calls Lock while a lower priority thread
+// has the same lock, then on many systems the higher priority thread will 
+// use up all the CPU time waiting for the lock and the lower priority thread
+// will not get the CPU time needed to free the lock.
+//
+// From Usenet:
+//    A spinlock is a machine-specific "optimized" form of mutex
+//    ("MUTual EXclusion" device). However, you should never use
+//    a spinlock unless you know that you have multiple threads
+//    and that you're running on a multiprocessor. Otherwise, at
+//    best you're wasting a lot of time. A spinlock is great for
+//    "highly parallel" algorithms like matrix decompositions,
+//    where the application (or runtime) "knows" (or at least goes
+//    to lengths to ensure) that the threads participating are all
+//    running at the same time. Unless you know that, (and, if your
+//    code doesn't create threads, you CAN'T know that), don't even
+//    think of using a spinlock."
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_SPINLOCK_H
+#define EATHREAD_EATHREAD_SPINLOCK_H
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <new> // include new for placement new operator
+
+#if defined(EA_PROCESSOR_X86)
+	// The reference x86 code works fine, as there is little that assembly
+	// code can do to improve it by much, assuming that the code is compiled
+	// in an optimized way. With VC7 on the PC platform, compiling with 
+	// optimization set to 'minimize size' and most other optimizations 
+	// enabled yielded code that was similar to Intel reference asm code.
+	// However, when the compiler was set to minimize size and enable inlining,
+	// it created an implementation of the Lock function that was less optimal.
+	// #include <eathread/x86/eathread_spinlock_x86.h>
+#elif defined(EA_PROCESSOR_IA64)
+	// The reference code below is probably fine.
+	// #include <eathread/ia64/eathread_spinlock_ia64.h>
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+// The above header files would define EA_THREAD_SPINLOCK_IMPLEMENTED.
+#if !defined(EA_THREAD_SPINLOCK_IMPLEMENTED)
+
+	// We provide an implementation that works for all systems but is less optimal.
+	#include <eathread/eathread_sync.h>
+	#include <eathread/eathread_atomic.h>
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/// class SpinLock
+			///
+			/// Spinlocks are high-performance locks designed for special circumstances.
+			/// As such, they are not 'recursive' -- you cannot lock a spinlock twice.
+			/// Spinlocks have no explicit awareness of threading, but they are explicitly
+			/// thread-safe. 
+			///
+			/// You do not want to use spin locks as a *general* replacement for mutexes or
+			/// critical sections, even if you know your mutex use won't be recursive.
+			/// The reason for this is due to thread scheduling and thread priority issues.
+			/// A spinlock is not a kernel- or threading-kernel-level object and thus while
+			/// this gives it a certain amount of speed, it also means that if you have a 
+			/// low priority thread thread with a spinlock locked and a high priority thread
+			/// waiting for the spinlock, the program will hang, possibly indefinitely,
+			/// because the thread scheduler is giving all its time to the high priority 
+			/// thread which happens to be stuck. 
+			/// 
+			/// On the other hand, when judiciously used, a spin lock can yield significantly
+			/// higher performance than general mutexes, especially on platforms where mutex
+			/// locking is particularly expensive or on multiprocessing systems.
+			///
+			class SpinLock
+			{
+			protected: // Declared at the top because otherwise some compilers fail to compile inline functions below.
+				AtomicInt32 mAI;  /// A value of 0 means unlocked, while 1 means locked.
+
+			public:
+				SpinLock();
+
+				void Lock();
+				bool TryLock();
+				bool IsLocked();
+				void Unlock();
+
+				void* GetPlatformData();
+			};
+
+
+			/// SpinLockFactory
+			/// 
+			/// Implements a factory-based creation and destruction mechanism for class Spinlock.
+			/// A primary use of this would be to allow the Spinlock implementation to reside in
+			/// a private library while users of the class interact only with the interface
+			/// header and the factory. The factory provides conventional create/destroy 
+			/// semantics which use global operator new, but also provides manual construction/
+			/// destruction semantics so that the user can provide for memory allocation 
+			/// and deallocation.
+			class EATHREADLIB_API SpinLockFactory
+			{
+			public:
+				static SpinLock* CreateSpinLock();
+				static void      DestroySpinLock(SpinLock* pSpinLock);
+
+				static size_t    GetSpinLockSize();
+				static SpinLock* ConstructSpinLock(void* pMemory);
+
+				static void DestructSpinLock(SpinLock* pSpinLock);
+			};
+
+		} // namespace Thread
+
+	} // namespace EA
+
+
+#endif // EA_THREAD_SPINLOCK_IMPLEMENTED
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AutoSpinLock
+		/// An AutoSpinLock locks the SpinLock in its constructor and 
+		/// unlocks the SpinLock in its destructor (when it goes out of scope).
+		class AutoSpinLock
+		{
+		public:
+			AutoSpinLock(SpinLock& spinLock);
+		   ~AutoSpinLock();
+
+		protected:
+			SpinLock& mSpinLock;
+
+		protected:
+			// Prevent copying by default, as copying is dangerous.
+			AutoSpinLock(const AutoSpinLock&);
+			const AutoSpinLock& operator=(const AutoSpinLock&);
+		};
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// inlines
+///////////////////////////////////////////////////////////////////////////////
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+
+
+		///////////////////////////////////////////////////////////////////////
+		// SpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		SpinLock::SpinLock() 
+		  : mAI(0)
+		{
+		}
+
+		inline
+		void SpinLock::Lock()
+		{
+			Top: // Due to modern processor branch prediction, the compiler will optimize better for true branches and so we do a manual goto loop here.
+			if(mAI.SetValueConditional(1, 0))
+				return;
+
+			// The loop below is present because the SetValueConditional 
+			// call above is likely to be significantly more expensive and 
+			// thus we benefit by polling before attempting the real thing.
+			// This is a common practice and is recommended by Intel, etc.
+			while (mAI.GetValue() != 0)
+			{
+			#ifdef EA_THREAD_COOPERATIVE
+				ThreadSleep();
+			#else
+				EAProcessorPause();
+			#endif
+			}
+			goto Top;                                          
+		}                                                
+
+		inline
+		bool SpinLock::TryLock()
+		{
+			return mAI.SetValueConditional(1, 0);
+		}
+
+		inline
+		bool SpinLock::IsLocked()
+		{
+			return mAI.GetValueRaw() != 0;
+		}
+
+		inline
+		void SpinLock::Unlock()
+		{
+			EAT_ASSERT(IsLocked());
+			mAI.SetValue(0);
+		}
+
+		inline
+		void* SpinLock::GetPlatformData()
+		{
+			return &mAI;
+		}
+
+
+		///////////////////////////////////////////////////////////////////////
+		// SpinLockFactory
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		SpinLock* SpinLockFactory::CreateSpinLock()
+		{
+			if(gpAllocator)
+				return new(gpAllocator->Alloc(sizeof(SpinLock))) SpinLock;
+			else
+				return new SpinLock;
+		}
+
+		inline
+		void SpinLockFactory::DestroySpinLock(SpinLock* pSpinLock)
+		{
+			if(gpAllocator)
+			{
+				pSpinLock->~SpinLock();
+				gpAllocator->Free(pSpinLock);
+			}
+			else
+				delete pSpinLock;
+		}
+
+		inline
+		size_t SpinLockFactory::GetSpinLockSize()
+		{
+			return sizeof(SpinLock);
+		}
+
+		inline
+		SpinLock* SpinLockFactory::ConstructSpinLock(void* pMemory)
+		{
+			return new(pMemory) SpinLock;
+		}
+
+		EA_DISABLE_VC_WARNING(4100) // Compiler mistakenly claims pSpinLock is unreferenced
+		inline
+		void SpinLockFactory::DestructSpinLock(SpinLock* pSpinLock)
+		{
+			pSpinLock->~SpinLock();
+		}
+		EA_RESTORE_VC_WARNING()
+
+
+		///////////////////////////////////////////////////////////////////////
+		// AutoSpinLock
+		///////////////////////////////////////////////////////////////////////
+
+		inline
+		AutoSpinLock::AutoSpinLock(SpinLock& spinLock) 
+		  : mSpinLock(spinLock)
+		{
+			mSpinLock.Lock();
+		}
+
+		inline
+		AutoSpinLock::~AutoSpinLock()
+		{
+			mSpinLock.Unlock();
+		}
+
+	} // namespace Thread
+
+} // namespace EA
+
+#endif // EATHREAD_EATHREAD_SPINLOCK_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 362 - 0
include/eathread/eathread_storage.h

@@ -0,0 +1,362 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines thread-local storage and related concepts in a platform-independent
+// and thread-safe manner.
+//
+// As of this writing (10/2003), documentation concerning thread-local 
+// storage implementations under GCC, pthreads, and MSVC/Windows can be found at:
+//    http://gcc.gnu.org/onlinedocs/gcc-3.3.2/gcc/Thread-Local.html#Thread-Local
+//    http://java.icmc.sc.usp.br/library/books/ibm_pthreads/users-33.htm#324811
+//    http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vccore/html/_core_Thread_Local_Storage_.28.TLS.29.asp
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_STORAGE_H
+#define EATHREAD_EATHREAD_STORAGE_H
+
+
+#include <eathread/internal/config.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <stddef.h>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/////////////////////////////////////////////////////////////////////////
+		/// EA_THREAD_LOCAL
+		/// 
+		/// Documentation (partially culled from online information):
+		/// Thread Local Storage (a.k.a. TLS and Thread Specific Storage) is a 
+		/// mechanism by which each thread in a multithreaded process allocates 
+		/// storage for thread-specific data. In standard multithreaded programs, 
+		/// data is shared among all threads of a given process, whereas thread 
+		/// local storage is the mechanism for allocating per-thread data.
+		///
+		/// The EA_THREAD_LOCAL specifier may be used alone, with the extern or 
+		/// static specifiers, but with no other storage class specifier. 
+		/// When used with extern or static, EA_THREAD_LOCAL must appear 
+		/// immediately after the other storage class specifier.
+		///
+		/// The EA_THREAD_LOCAL specifier may be applied to any global, file-scoped 
+		/// static, function-scoped static, or static data member of a class. 
+		/// It may not be applied to block-scoped automatic or non-static data member.
+		///
+		/// When the address-of operator is applied to a thread-local variable, 
+		/// it is evaluated at run-time and returns the address of the current 
+		/// thread's instance of that variable. An address so obtained may be used 
+		/// by any thread. When a thread terminates, any pointers to thread-local
+		/// variables in that thread become invalid.
+		///
+		/// No static initialization may refer to the address of a thread-local variable.
+		/// In C++, if an initializer is present for a thread-local variable, 
+		/// it must be a constant-expression, as defined in 5.19.2 of the ANSI/ISO C++ standard. 
+		/// 
+		/// Windows has special considerations for using thread local storage in a DLL.  
+		/// 
+		/// Example usage:
+		///    #if defined(EA_THREAD_LOCAL)
+		///        EA_THREAD_LOCAL int n = 0;                       // OK
+		///        extern EA_THREAD_LOCAL struct Data s;            // OK
+		///        static EA_THREAD_LOCAL char* p;                  // OK
+		///        EA_THREAD_LOCAL int i = sizeof(i);               // OK.
+		///        EA_THREAD_LOCAL std::string s("hello");          // Bad -- Can't be used for initialized objects.
+		///        EA_THREAD_LOCAL int Function();                  // Bad -- Can't be used as return value.
+		///        void Function(){ EA_THREAD_LOCAL int i = 0; }    // Bad -- Can't be used in function.
+		///        void Function(EA_THREAD_LOCAL int i){ }          // Bad -- can't be used as argument.
+		///        extern int i; EA_THREAD_LOCAL int i;             // Bad -- Declarations differ.
+		///        int EA_THREAD_LOCAL i;                           // Bad -- Can't be used as a type modifier.
+		///        EA_THREAD_LOCAL int i = i;                       // Bad -- Can't reference self before initialization.
+		///    #else
+		///        Need to use EA::Thread::ThreadLocalStorage.
+		///    #endif
+
+		#if !EA_THREADS_AVAILABLE
+			#define EA_THREAD_LOCAL
+
+		// Disabled until we have at least one C++11 compiler that supports this which can be tested.
+		//#elif (EABASE_VERSION_N >= 20040) && !defined(EA_COMPILER_NO_THREAD_LOCAL)
+		//    #define EA_THREAD_LOCAL thread_local
+
+		#elif EA_USE_CPP11_CONCURRENCY
+			#if defined(EA_COMPILER_MSVC11_0) // VC11 doesn't support C++11 thread_local storage class yet
+				#define EA_THREAD_LOCAL __declspec(thread)
+			#else
+				#define EA_THREAD_LOCAL thread_local
+			#endif
+
+		#elif defined(__APPLE__)
+			// http://clang.llvm.org/docs/LanguageExtensions.html
+			#if __has_feature(cxx_thread_local)
+				#define EA_THREAD_LOCAL thread_local
+			#else
+				#define EA_THREAD_LOCAL 
+			#endif
+		#elif (defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))) && (defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_UNIX)) // Any of the Unix variants, including Mac OSX.
+			// While GNUC v3.3 is the first version that supports thread local storage
+			// declarators, not all versions of GNUC for all platforms support it, 
+			// as it requires support from other tools and libraries beyond the compiler.
+			#if defined(__CYGWIN__) // Cygwin's branch of the GCC toolchain does not currently support TLS.
+				// Not supported.
+			#else
+				#define EA_THREAD_LOCAL __thread
+			#endif
+
+		#elif defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_BORLAND) || (defined(EA_PLATFORM_WINDOWS) &&  defined(EA_COMPILER_INTEL))
+			// This appears to be supported by VC++, Borland C++.
+			// And it is supported by all compilers for the Windows platform.
+			#define EA_THREAD_LOCAL __declspec(thread)
+
+		#elif defined(EA_PLATFORM_SONY) || defined(CS_UNDEFINED_STRING)
+			#define EA_THREAD_LOCAL __thread
+
+		#else
+			// Else don't define it as anything. This will result in a compilation 
+			// error reporting the problem. We cannot simply #define away the 
+			// EA_THEAD_LOCAL term, as doing so would defeat the purpose of the 
+			// specifier. Dynamic thread local storage is a more flexible and
+			// portable solution to the problem.
+			// #define EA_THREAD_LOCAL
+		#endif
+		/////////////////////////////////////////////////////////////////////////
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+
+
+/////////////////////////////////////////////////////////////////////////
+/// EAThreadLocalStorageData
+///
+/// This is used internally by class ThreadLocalStorage.
+/// Todo: Consider moving this declaration into a platform-specific 
+/// header file.
+///
+#if defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+
+	struct EAThreadLocalStorageData{
+		ScePthreadKey mKey;     // This is usually a pointer.
+		int           mResult;  // Result of call to scePthreadKeyCreate, so we can know if mKey is valid.
+	};
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && !defined(CS_UNDEFINED_STRING)
+	// In this case we will be using pthread_key_create, pthread_key_delete, pthread_getspecific, pthread_setspecific.
+	#include <pthread.h>
+
+	struct EAThreadLocalStorageData{
+		pthread_key_t mKey;     // This is usually a pointer.
+		int           mResult;  // Result of call to pthread_key_create, so we can know if mKey is valid.
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_WINDOWS_PHONE) && !(defined(EA_PLATFORM_WINDOWS) && !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)) 
+	// In this case we will be using TlsAlloc, TlsFree, TlsGetValue, TlsSetValue.
+	typedef uint32_t EAThreadLocalStorageData;
+
+#elif (!EA_THREADS_AVAILABLE || defined(EA_PLATFORM_CONSOLE)) && !defined(CS_UNDEFINED_STRING)
+	#include <eathread/eathread.h>
+
+	struct EAThreadLocalStorageData
+	{
+		struct ThreadToDataPair
+		{
+			EA::Thread::ThreadUniqueId mThreadID;
+			const void* mpData;
+		};
+		#ifndef EA_TLS_MAX_COUNT
+			#define EA_TLS_MAX_COUNT 16 // This is the max number of threads that might want to use the given thread-local-storage item.
+		#endif
+		ThreadToDataPair* GetTLSEntry(bool bCreateIfNotFound);
+		ThreadToDataPair  mDataArray[EA_TLS_MAX_COUNT];
+		int               mDataArrayCount;
+	};
+
+#else // STL version which uses less memory but uses heap memory.
+
+	// If you use this version, then you want to make sure your STL is using new/delete
+	// by default and then make sure you are globally mapping new/delete to your 
+	// custom allocation system. STLPort, for example, tends to want to use its
+	// own internal allocator which is non-optimal for serious uses.
+
+	EA_DISABLE_VC_WARNING(4574 4350)
+	#include <map> // Note that this dependency on STL map is only present if you use this pathway, which is disabled by default.
+	EA_RESTORE_VC_WARNING()
+
+	#include <eathread/eathread.h>
+	#include <eathread/eathread_futex.h>
+
+	struct EAThreadLocalStorageData
+	{
+		EAThreadLocalStorageData() : mThreadToDataMap(NULL) {}
+		~EAThreadLocalStorageData() { delete mThreadToDataMap; mThreadToDataMap = NULL; }
+		void** GetTLSEntry(bool bCreateIfNotFound);
+		// We allocate this map only when needed
+		// This prevents too early allocations before our allocator initialization
+		std::map<EA::Thread::ThreadUniqueId, const void*> *mThreadToDataMap;
+		EA::Thread::Futex mFutex;
+	private:
+		// Disable copy and assignment
+		EAThreadLocalStorageData(const EAThreadLocalStorageData&);
+		EAThreadLocalStorageData operator=(const EAThreadLocalStorageData&);
+	};
+#endif
+/////////////////////////////////////////////////////////////////////////
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/////////////////////////////////////////////////////////////////////////
+		/// class ThreadLocalStorage
+		///
+		/// This is a class that lets you store a pointer to data uniquely for 
+		/// each thread. It thus allows access to a pointer as if it were local
+		/// but each thread gets its own copy.
+		///
+		/// The implementation behind this class maps to the PThreads API under
+		/// Unix-like systems, maps to the Windows TLS SPI under Windows, and 
+		/// maps to a custom implementation otherwise. The PThreads API has a 
+		/// mechanism whereby you can set a callback to execute when a thread
+		/// exits; the callback will call the callback once for each pointer 
+		/// that was stored in all thread local storage objects. Due to the 
+		/// general weaknesses of the PThread mechanism and due to our interest
+		/// in being as lean as possible, we don't support automatic callbacks
+		/// such as with PThreads. The same effect can be achieved manually 
+		/// when needed.
+		///
+		/// Example usage:
+		///     ThreadLocalStorage tls;
+		///     void* pValue;
+		///     bool bResult;
+		///     
+		///     pValue  = tls.GetValue();              // Return value will be NULL.
+		///     bResult = tls.SetValue(NULL);          // This is fine and bResult should be true.
+		///     pValue  = tls.GetValue();              // Return value will be NULL.
+		///     bResult = tls.SetValue(pSomeObject);   // Set thread-specific value to pSomeObject.
+		///     bResult = tls.SetValue(pOtherObject);  // Set thread-specific value to pOtherObject.
+		///     pValue  = tls.GetValue();              // Return value will be pOtherObject.
+		///     bResult = tls.SetValue(NULL);          // This is fine and bResult should be true.
+		///
+		class EATHREADLIB_API ThreadLocalStorage
+		{
+		public:
+			ThreadLocalStorage();
+		   ~ThreadLocalStorage();
+
+			/// GetValue
+			/// Returns the pointer previous stored via GetValue or returns NULL if there
+			/// is not stored value or if the user stored NULL.
+			void* GetValue();
+
+			/// SetValue
+			/// Stores a pointer, returns true if the storage was possible. In general,
+			/// the only reason that false would ever be returned is if there wasn't 
+			/// sufficient memory remaining for the operation. When a thread exits, 
+			/// it should call SetValue(NULL), as there is currently no mechanism to 
+			/// automatically detect thread exits on some platforms and thus there is
+			/// no way to automatically clear these values.
+			bool SetValue(const void* pData);
+
+			/// GetPlatformData
+			/// Returns the platform-specific thread local storage handle for debugging
+			/// uses or other cases whereby special (and non-portable) uses are required.
+			void* GetPlatformData()
+				{ return &mTLSData; }
+
+		protected:
+			EAThreadLocalStorageData mTLSData;
+
+		private:
+			// Disable copy and assignment
+			ThreadLocalStorage(const ThreadLocalStorage&);
+			ThreadLocalStorage operator=(const ThreadLocalStorage&);
+		};
+		/////////////////////////////////////////////////////////////////////////
+
+
+
+		/// ThreadLocalStorageFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class ThreadLocalStorage.
+		/// A primary use of this would be to allow the ThreadLocalStorage implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ThreadLocalStorageFactory
+		{
+		public:
+			static ThreadLocalStorage* CreateThreadLocalStorage();                           // Internally implemented as: return new ThreadLocalStorage;
+			static void                DestroyThreadLocalStorage(ThreadLocalStorage* pTLS);  // Internally implemented as: delete pTLS;
+
+			static size_t              GetThreadLocalStorageSize();                          // Internally implemented as: return sizeof(ThreadLocalStorage);
+			static ThreadLocalStorage* ConstructThreadLocalStorage(void* pMemory);           // Internally implemented as: return new(pMemory) ThreadLocalStorage;
+			static void                DestructThreadLocalStorage(ThreadLocalStorage* pTLS); // Internally implemented as: pTLS->~ThreadLocalStorage();
+		};
+
+
+
+		// ThreadLocalPointer
+		// This is a class that adds pointer type awareness to ThreadLocalStorage.
+		// The interface is designed to look like the standard auto_ptr class.
+		//
+		// The following is disabled until we provide a way to enumerate and delete
+		// the pointers when the object goes out of scope or delete the thread-specific 
+		// pointer when the thread ends. Both are require before this class fully acts
+		// as one would expect.
+		//
+		//template <typename T>
+		//class ThreadLocalPointer
+		//{
+		//public:
+		//    T* get()        const { return  static_cast<T*>(mTLS.GetValue()); }
+		//    T* operator->() const { return  static_cast<T*>(mTLS.GetValue()); }
+		//    T& operator*()  const { return *static_cast<T*>(mTLS.GetValue()); }
+		//    void reset(T* pNew = 0){
+		//        T* const pTemp = get();
+		//        if(pNew != pTemp){
+		//            delete pTemp;
+		//            mTLS.SetValue(pTemp);
+		//        }
+		//    }
+		//
+		//protected:
+		//    ThreadLocalStorage mTLS;
+		//
+		//private:
+		//    ThreadLocalPointer(const ThreadLocalPointer&);
+		//    const ThreadLocalPointer& operator=(const ThreadLocalPointer&);
+		//};
+		/////////////////////////////////////////////////////////////////////////
+
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#endif // #ifdef EATHREAD_EATHREAD_STORAGE_H
+
+
+
+
+
+
+
+

+ 272 - 0
include/eathread/eathread_sync.h

@@ -0,0 +1,272 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+//
+// Overview (partially taken from Usenet)
+// On all modern hardware, a store instruction does not necessarily result
+// in an immediate write to main memory, or even to the (processor specific)
+// cache. A store instruction simply places a write request in a request
+// queue, and continues. (Future reads in the same processor will check if
+// there is a write to the same address in this queue, and fetch it, rather
+// than reading from memory. Reads from another processor, however, can't
+// see this queue.) Generally, the ordering of requests in this queue is
+// not guaranteed, although some hardware offers stricter guarantees.
+// Thus, you must do something to ensure that the writes actually occur.
+// This is called a write barrier, and generally takes the form of a special
+// instruction.
+// 
+// And of course, just because you have written the data to main memory
+// doesn't mean that some other processor, executing a different thread,
+// doesn't have a stale copy in its cache, and use that for a read. Before
+// reading the variables, you need to ensure that the processor has the
+// most recent copy in its cache. This is called a read barrier, and
+// again, takes the form of a special hardware instruction. A number of
+// architectures (e.g. Intel x86-32) still guarantee read consistency -- 
+// all of the processors "listen" on the main memory bus, and if there is 
+// a write, automatically purge the corresponding data from their cache. 
+// But not all.
+//
+// Note that if you are writing data within a operating system-level 
+// locked mutex, the lock and unlock of the mutex will synchronize memory
+// for you, thus eliminating the need for you to execute read and/or write
+// barriers. However, mutex locking and its associated thread stalling is 
+// a potentially inefficient operation when in some cases you could simply 
+// write the memory from one thread and read it from another without 
+// using mutexes around the data access. Some systems let you write memory 
+// from one thread and read it from another (without you using mutexes)
+// without using memory barriers, but others (notably SMP) will not let you 
+// get away with this, even if you put a mutex around the write. In these
+// cases you need read/write barriers.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_EATHREAD_SYNC_H
+#define EATHREAD_EATHREAD_SYNC_H
+
+
+// Note
+// These functions are not placed in a C++ namespace but instead are standalone.
+// The reason for this is that these are usually implemented as #defines of 
+// C or asm code or implemented as compiler intrinsics. We however document
+// these functions here as if they are simply functions. The actual platform-
+// specific declarations are in the appropriate platform-specific directory.
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing.
+#elif defined(EA_PLATFORM_OSX)
+	#include <eathread/powerpc/eathread_sync_powerpc.h>
+#elif defined(EA_PROCESSOR_X86)
+	#include <eathread/x86/eathread_sync_x86.h>
+#elif defined(EA_PROCESSOR_X86_64)
+	#include <eathread/x86-64/eathread_sync_x86-64.h>
+#elif defined(EA_PROCESSOR_IA64)
+	#include <eathread/ia64/eathread_sync_ia64.h>
+#elif defined(EA_PLATFORM_APPLE)
+	#include <eathread/apple/eathread_sync_apple.h>
+#elif defined(EA_PROCESSOR_ARM) 
+	#include <eathread/arm/eathread_sync_arm.h>
+#endif
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+// EA_THREAD_DO_SPIN
+//     
+// Provides a macro which maps to whatever processor idle functionality the given platform requires.
+// 
+// Example usage:
+//     EA_THREAD_DO_SPIN();
+// 
+#ifndef EA_THREAD_DO_SPIN
+	#ifdef EA_THREAD_COOPERATIVE  
+		 #define EA_THREAD_DO_SPIN() ThreadSleep()               
+	#else
+		 #define EA_THREAD_DO_SPIN() EAProcessorPause() // We don't check for EA_TARGET_SMP here and instead sleep if not defined because you probably shouldn't be using a spinlock on a pre-emptive system unless it is a multi-processing system.     
+	#endif
+#endif
+
+
+
+// The above header files would define EA_THREAD_SYNC_IMPLEMENTED.
+#if !defined(EA_THREAD_SYNC_IMPLEMENTED)
+   // Perhaps it should be considered too serious of an error to allow compilation 
+   // to continue. If so, then we should enable the #error below.
+   // #error EA_THREAD_SYNC_IMPLEMENTED not defined. 
+
+
+   /// EAProcessorPause
+   ///
+   /// \Declaration
+   ///    void EAProcessorPause();
+   ///
+   /// \Description
+   ///    This statement causes the processor to efficiently (as much as possible)
+   ///    execute a no-op (a.k.a nop or noop). These are particularly useful in 
+   ///    spin-wait loops. Without a proper pause, some processors suffer severe
+   ///    performance penalties while executing spin-wait loops such as those in 
+   ///    simple spin locks. Many processors have specialized pause instructions 
+   ///    (e.g. Intel x86 P4 'pause' or 'asm rep nop') that can be taken advantage 
+   ///    of here.
+   ///
+   /// \Example
+   ///    while (!flag) {
+   ///       EAProcessorPause();
+   ///    }
+   #define EAProcessorPause()
+
+
+
+   /// EAReadBarrier
+   ///
+   /// \Declaration
+   ///    void EAReadBarrier();
+   ///
+   /// \Description
+   ///    A read barrier ensures that neither software nor hardware perform a memory 
+   ///    read prior to the read barrier and that recent writes to main memory are 
+   ///    immediately seen (and not using stale cached data) by the processor executing
+   ///    the read barrier. This generally does not mean a (performance draining) 
+   ///    invalidation of the entire cache but does possibly mean invalidating any cache 
+   ///    that refers to main memory which has changed. Thus, there is a performance 
+   ///    cost but considering the use of this operation, this is the most efficient 
+   ///    way of achieving the effect.
+   ///
+   /// \Example
+   ///    The following function will operate fine on some multiprocessing systems but 
+   ///    hang (possibly indefinitely) on other multiprocessing systems unless the 
+   ///    EAReadBarrier call is present.
+   ///
+   ///    void ThreadFunction() {
+   ///      extern volatile int gFlag;
+   ///      while(gFlag == 0){ // Wait for separate thread to write to gSomeFlag.
+   ///         EAProcessorPause(); 
+   ///         EAReadBarrier();
+   ///         // Do memory sharing operations with other threads here.
+   ///      }
+   ///    }
+   #define EAReadBarrier()
+
+
+
+
+
+   /// EAWriteBarrier
+   ///
+   /// \Declaration
+   ///    void EAWriteBarrier();
+   ///
+   /// \Description
+   ///    A write barrier ensures that neither software nor hardware delay a memory 
+   ///    write operation past the barrier. If you want your memory write committed
+   ///    to main memory immediately, this statement will have that effect. As such,
+   ///    this is something like a flush of the current processor's write cache.
+   ///    Note that flushing memory from a processor's cache to main memory like this
+   ///    doesn't cause a second processor to immediately see the changed values in 
+   ///    main memory, as the second processor has a read cache between it and main 
+   ///    memory. Thus, a second processor would need to execute a read barrier if it
+   ///    wants to see the updates immediately.
+   #define EAWriteBarrier()
+
+
+
+
+
+   /// EAReadWriteBarrier
+   ///
+   /// Declaration
+   ///    void EAReadWriteBarrier();
+   ///
+   /// Description
+   ///    A read/write barrier has the same effect as both a read barrier and a write
+   ///    barrier at once. A read barrier ensures that neither software nor hardware 
+   ///    perform a memory read prior to the read barrier, while a write barrier 
+   ///    ensures that neither software nor hardware delay a memory write operation 
+   ///    past the barrier. A ReadWriteBarrier specifically acts like a WriteBarrier
+   ///    followed by a ReadBarrier, despite the name ReadWriteBarrier being the 
+   ///    other way around.
+   ///
+   ///    EAReadWriteBarrier synchronizes both reads and writes to system memory 
+   ///    between processors and their caches on multiprocessor systems, particulary 
+   ///    SMP systems. This can be useful to ensure the state of global variables at 
+   ///    a particular point in your code for multithreaded applications. Higher level
+   ///    thread synchronization level primitives such as mutexes achieve the same 
+   ///    effect (while providing the additional functionality of synchronizing code
+   ///    execution) but at a significantly higher cost. 
+   ///
+   ///    A two-processor SMP system has two processors, each with its own instruction
+   ///    and data caches. If the first processor writes to a memory location and the 
+   ///    second processor needs to read from that location, the first procesor's 
+   ///    write may still be in its cache and not committed to main memory and the 
+   ///    second processor may thus would not see the newly written value. The value
+   ///    will eventually get written from the first cache to main memory, but if you 
+   ///    need to ensure that it is written at a particular time, you would use a 
+   ///    ReadWrite barrier. 
+   ///
+   ///    This function is similar to the Linux kernel rwb() function and to the 
+   ///    Windows kernel KeMemoryBarrier function.
+   #define EAReadWriteBarrier()
+
+
+
+
+
+   /// EACompilerMemoryBarrier
+   ///
+   /// \Declaration
+   ///    void EACompilerMemoryBarrier();
+   ///
+   /// \Description
+   ///    Provides a barrier for compiler optimization. The compiler will not make
+   ///    assumptions about locations across an EACompilerMemoryBarrier statement.
+   ///    For example, if a compiler has memory values temporarily cached in 
+   ///    registers but you need them to be written to memory, you can execute the
+   ///    EACompilerMemoryBarrier statement. This is somewhat similar in concept to 
+   ///    the C volatile keyword except that it applies to all memory the compiler
+   ///    is currently working with and applies its effect only where you specify
+   ///    and not for every usage as with the volatile keyword. 
+   ///
+   ///    Under GCC, this statement is equivalent to the GCC `asm volatile("":::"memory")` 
+   ///    statement. Under VC++, this is equivalent to a _ReadWriteBarrier statement  
+   ///    (not to be confused with EAReadWriteBarrier above) and equivalent to the Windows
+   ///    kernel function KeMemoryBarrierWithoutFence. This is also known as barrier()
+   ///    undef Linux. 
+   ///    
+   ///    EACompilerMemoryBarrier is a compiler-level statement and not a 
+   ///    processor-level statement. For processor-level memory barriers, 
+   ///    use EAReadBarrier, etc.
+   /// 
+   /// \Example
+   ///    Without the compiler memory barrier below, an optimizing compiler might
+   ///    never assign 0 to gValue because gValue is reassigned to 1 later and 
+   ///    because gValue is not declared volatile.
+   ///
+   ///    void ThreadFunction() {
+   ///       extern int gValue; // Note that gValue is intentionally not declared volatile, 
+   ///       gValue = 0;
+   ///       EACompilerMemoryBarrier();
+   ///       gValue = 1;
+   ///    }
+   #define EACompilerMemoryBarrier()
+
+
+#endif // EA_THREAD_SYNC_IMPLEMENTED
+
+
+#endif // #ifdef EATHREAD_EATHREAD_SYNC_H
+
+
+
+
+
+
+
+

+ 802 - 0
include/eathread/eathread_thread.h

@@ -0,0 +1,802 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_EATHREAD_THREAD_H
+#define EATHREAD_EATHREAD_THREAD_H
+
+#include <eathread/eathread.h>
+#include <eathread/eathread_semaphore.h>
+#include <eathread/eathread_atomic.h>
+EA_DISABLE_ALL_VC_WARNINGS()
+#include <stddef.h>
+#include <stdlib.h>
+#include <type_traits>
+EA_RESTORE_ALL_VC_WARNINGS()
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// Suppress warning about class 'AtomicInt32' needs to have a
+	// dll-interface to be used by clients of class which have a templated member.
+	// 
+	// These templates cannot be instantiated outside of the DLL. If you try, a
+	// link error will result. This compiler warning is intended to notify users
+	// of this.
+	#pragma warning(push)
+	#pragma warning(disable: 4251)
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////
+/// ThreadData
+///
+/// This is used internally by class Thread.
+/// To consider: Move this declaration into a platform-specific 
+/// header file.
+/////////////////////////////////////////////////////////////////////////
+
+#if !EA_THREADS_AVAILABLE
+
+	struct EAThreadDynamicData
+	{
+	};
+
+	struct EAThreadData
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+#elif EA_USE_CPP11_CONCURRENCY
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_semaphore.h>
+
+	EA_DISABLE_VC_WARNING(4062 4265 4365 4836 4571 4625 4626 4628 4193 4127 4548 4350)
+	#if EA_PLATFORM_WINDOWS
+		#include <ctxtcall.h> // workaround for compile errors in winrt.  see http://connect.microsoft.com/VisualStudio/feedback/details/730564/ppl-in-winrt-projects-fail-to-compile
+	#endif
+	#include <future>
+	#include <mutex>
+
+	struct EAThreadDynamicData
+	{
+		typedef void (*ThreadFunc)(EAThreadDynamicData* tdd, void* userFunc, void* userContext, void* userWrapperFunc);
+		EAThreadDynamicData(EA::Thread::ThreadUniqueId uniqueThreadId, const char* pThreadName);
+		EAThreadDynamicData(void* userFunc, void* userContext, void* userWrapperFunc, ThreadFunc threadFunc);
+		~EAThreadDynamicData();
+
+		void AddRef();
+		void Release();
+
+		EA::Thread::AtomicInt32 mnRefCount;
+		EA::Thread::AtomicInt32 mStatus;
+		intptr_t mReturnValue;
+		char mName[EATHREAD_NAME_SIZE];
+		void* mpStackBase; 
+		EA::Thread::ThreadAffinityMask      mnThreadAffinityMask; 
+		
+		EA::Thread::ThreadUniqueId mUniqueThreadId;
+		struct EAThreadComposite
+		{
+			EAThreadComposite()
+			: mReturnPromise()
+			, mReturnFuture(mReturnPromise.get_future())
+			, mGetStatusFuture(mReturnFuture)
+			{
+			}
+
+			std::promise<intptr_t> mReturnPromise;
+			std::shared_future<intptr_t> mReturnFuture;
+			std::shared_future<intptr_t> mGetStatusFuture;
+			std::thread mThread;
+		} *mpComp;
+
+	private:
+		// Disable copy and assignment
+		EAThreadDynamicData(const EAThreadDynamicData&);
+		EAThreadDynamicData operator=(const EAThreadDynamicData&);
+	};
+
+	struct EAThreadData 
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+	EA_RESTORE_VC_WARNING()
+
+// TODO:  collapse the defines.
+#elif defined(EA_PLATFORM_SONY)
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_semaphore.h>
+	#include <kernel.h>
+	#include <scebase.h>
+
+	// Internal queue wrapper which is used to allow for a higher resolution sleep than what is provided by Sony's sleep functions
+	// as despite the names, sceKernelSleep, sceKernelUSleep and sceKernelNanosleep are all 1 ms resolution whereas this timer is 100 microseconds
+	struct EAThreadTimerQueue
+	{
+		EAThreadTimerQueue()
+		{
+			int result = sceKernelCreateEqueue(&mTimerEventQueue, "EAThread Timer Queue");
+			mbEnabled = result == SCE_OK;
+
+			// A timer queue will fail to be created when there are too many kernel objects open.  It is a valid
+			// use-case for the Event Queue to fail being created as the ThreadSleep function implements a fallback.
+			//
+			// EAT_ASSERT_FORMATTED(mbEnabled, "Failed to initialize the EAThread Timer Queue (0x%x)", result);
+		}
+
+		~EAThreadTimerQueue()
+		{
+			if(mbEnabled)  // only destroy the queue if it was created.
+				sceKernelDeleteEqueue(mTimerEventQueue);
+				
+			mbEnabled = false;
+		}
+
+		SceKernelEqueue mTimerEventQueue;
+		EA::Thread::AtomicUint32 mCurrentId = 0;
+		bool mbEnabled = false;
+	};
+
+	struct EAThreadDynamicData
+	{
+		EAThreadDynamicData();
+	   ~EAThreadDynamicData();
+
+		void  AddRef();
+		void  Release();
+
+		EA::Thread::ThreadId			mThreadId;
+		EA::Thread::SysThreadId			mSysThreadId;
+		pid_t							mThreadPid;                     // For Linux this is the thread ID from gettid(). Otherwise it's the getpid() value.
+		volatile int					mnStatus;
+		intptr_t						mnReturnValue;
+		void*							mpStartContext[2];
+		void*							mpBeginThreadUserWrapper;       // User-specified BeginThread function wrapper or class wrapper
+		void*							mpStackBase; 
+		EA::Thread::AtomicInt32			mnRefCount;
+		char							mName[EATHREAD_NAME_SIZE];
+		int								mStartupProcessor;              // The thread affinity for the thread to set itself to after it starts. We need to do this because we currently have no way to set the affinity of another thread until after it has started.
+		EA::Thread::Mutex				mRunMutex;                      // Locked while the thread is running. The reason for this mutex is that it allows timeouts to be specified in the WaitForEnd function.
+		EA::Thread::Semaphore			mStartedSemaphore;              // Signaled when the thread starts. This allows us to know in a thread-safe way when the thread has actually started executing.
+		EA::Thread::ThreadAffinityMask  mnThreadAffinityMask; 
+		EAThreadTimerQueue				mThreadTimerQueue;				// This queue allows for high resolution timer events to be submitted per thread allowing for better sleep resolution than Sony's provided sleep functions
+	};
+
+
+	struct EAThreadData{
+		EAThreadDynamicData* mpData;
+	};
+
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include <pthread.h>
+	#include <eathread/eathread_mutex.h>
+	#include <eathread/eathread_semaphore.h>
+
+	struct EAThreadDynamicData
+	{
+		EAThreadDynamicData();
+	   ~EAThreadDynamicData();
+
+		void  AddRef();
+		void  Release();
+
+		EA::Thread::ThreadId    mThreadId;
+		EA::Thread::SysThreadId mSysThreadId;
+		pid_t                   mThreadPid;                     // For Linux this is the thread ID from gettid(). Otherwise it's the getpid() value.
+		volatile int            mnStatus;
+		intptr_t                mnReturnValue;
+		void*                   mpStartContext[2];
+		void*                   mpBeginThreadUserWrapper;       // User-specified BeginThread function wrapper or class wrapper
+		void*                   mpStackBase; 
+		EA::Thread::AtomicInt32 mnRefCount;
+		char                    mName[EATHREAD_NAME_SIZE];
+		int                     mStartupProcessor;              // DEPRECATED:  The thread affinity for the thread to set itself to after it starts. We need to do this because we currently have no way to set the affinity of another thread until after it has started.
+		EA::Thread::ThreadAffinityMask      mnThreadAffinityMask; // mStartupProcessor is deprecated in favor of using the the mnThreadAffinityMask and doesn't suffer from the limitations of only specifying the value at thread startup time.
+		EA::Thread::Mutex       mRunMutex;                      // Locked while the thread is running. The reason for this mutex is that it allows timeouts to be specified in the WaitForEnd function.
+		EA::Thread::Semaphore   mStartedSemaphore;              // Signaled when the thread starts. This allows us to know in a thread-safe way when the thread has actually started executing.
+	};
+
+
+	struct EAThreadData
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+
+	struct EAThreadDynamicData
+	{
+		EAThreadDynamicData();
+	   ~EAThreadDynamicData();
+		void    AddRef();
+		void    Release();
+
+		EA::Thread::ThreadId                mhThread;
+		unsigned int                        mnThreadId;                     // EA::Thread::SysThreadId
+		int                                 mnStatus;
+		EA::Thread::ThreadAffinityMask      mnThreadAffinityMask;
+		intptr_t                            mnReturnValue;
+		void*                               mpStartContext[3];
+		void*                               mpBeginThreadUserWrapper;     // User-specified BeginThread function wrapper or class wrapper
+		void*                               mpStackBase; 
+		EA::Thread::AtomicInt32             mnRefCount;
+		char                                mName[EATHREAD_NAME_SIZE];
+	};
+
+
+	struct EAThreadData
+	{
+		EAThreadDynamicData* mpData;
+	};
+
+#endif
+
+namespace EA
+{
+namespace Thread
+{
+
+struct EATHREADLIB_API ThreadEnumData
+{
+	ThreadEnumData();
+	~ThreadEnumData();
+
+	EAThreadDynamicData* mpThreadDynamicData;
+	void Release();
+};
+
+} 
+}
+/////////////////////////////////////////////////////////////////////////
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// FindThreadDynamicData
+		/// Utility functionality, not needed for most uses.
+		EATHREADLIB_API EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId);
+		EATHREADLIB_API EAThreadDynamicData* FindThreadDynamicData(SysThreadId threadId);
+		
+		/// EnumerateThreads
+		/// Enumerates known threads. For some platforms the returned thread list is limited
+		/// to the main thread and threads created by EAThread.
+		/// Returns the required count to enumerate all threads.
+		/// Fills in thread data up till the supplied capacity.
+		///
+		/// Example usage:
+		///     ThreadEnumData enumData[32];
+		///     size_t count = EA::Thread::EnumerateThreads(enumData, EAArrayCount(enumData));
+		///
+		///     for(size_t i = 0; i < count; i++)
+		///     {
+		///         printf("Thread id: %s\n", EAThreadIdToString(enumData[i].mpThreadDynamicData->mThreadId));
+		///         enumData[i].Release();
+		///     }
+		size_t EATHREADLIB_API EnumerateThreads(ThreadEnumData* pDataArray, size_t dataArrayCapacity);
+
+		/// RunnableFunction
+		/// Defines the prototype of a standalone thread function.
+		/// The return value is of type intptr_t, which is a standard integral 
+		/// data type that is large enough to hold an int or void*.
+		typedef intptr_t (*RunnableFunction)(void* pContext);
+
+		/// IRunnable
+		/// Defines a class whose Run function executes in a separate thread.
+		/// An implementation of this interface can be run using a Thread class instance.
+		struct EATHREADLIB_API IRunnable
+		{
+			 virtual ~IRunnable() { }
+
+			 /// \brief Task run entry point
+			 /// The thread terminates when this method returns. 
+			 /// The return value is of type intptr_t, which is a standard integral 
+			 /// data type that is large enough to hold an int or void*.
+			 virtual intptr_t Run(void* pContext = NULL) = 0;
+		};
+
+		/// RunnableFunctionUserWrapper
+		/// Defines the prototype of a user callback function when thread function is started.
+		/// \param pContext: thread start context void* passed in from thread Thread::Begin() 
+		/// \param defaultRunnableFunction: default function Thread::Begin() normally would
+		///          call, user must call this function with passed in pContext.
+		///
+		/// Here's an example:
+		/// \code
+		/// int ThreadFunction(void*)
+		/// {
+		///      printf("Throw NULL pointer Exception.\n");
+		///      char* pTest = NULL;
+		///      *pTest = 1;
+		///      return 0;
+		/// }
+		/// 
+		/// intptr_t MyThreadBeginWrapper(RunnableFunction defaultRunnableFunction, void* pContext)
+		/// {
+		///      // Do pre-start thread function stuff
+		///      try {
+		///            // must call defaultRunnableFunction to execute thread function, if don't then
+		///            // thread function will never gets executed.
+		///            intptr_t retValue = defaultRunnableFunction(pContext);
+		///      }
+		///      catch(...) {
+		///            printf("Exception detected.\n");
+		///      }
+		///     
+		///      // do post-start thread function stuff
+		///      return retValue;
+		/// }
+		/// \endcode
+		///
+		/// In your thread begin() function:
+		/// \code
+		/// ...
+		/// threadIds = threads.Begin(ThreadFunction, NULL, NULL, MyThreadBeginWrapper);
+		/// ...
+		/// \endcode
+		typedef intptr_t (*RunnableFunctionUserWrapper)(RunnableFunction defaultRunnableFunction, void* pContext);
+
+
+		/// RunnableClassUserWrapper
+		/// Defines the prototype of a user callback function when thread function is started.
+		/// \param pContext: thread start context void* passed in from thread Thread::Begin() 
+		/// \param defaultRunnableFunction: default function Thread::Begin() normally would
+		///          call, user must call this function with passed in pContext.
+		/// 
+		/// Here's an example:
+		/// \code
+		/// class MyThreadClass
+		/// {
+		///      virtual intptr_t Run(void* pContext = NULL)
+		///      {
+		///            printf("Throw NULL pointer Exception.\n");
+		///            char* pTest = NULL;
+		///            *pTest = 1;
+		///            return 0;
+		///      }
+		/// }
+		/// 
+		/// intptr_t MyThreadBeginWrapper(IRunnable defaultRunnableFunction, void* pContext)
+		/// {
+		///      // do pre-start thread function stuff
+		///
+		///      // a good example is try catch block
+		///      try
+		///      {
+		///            // must call defaultRunnableFunction to execute thread function, if don't then
+		///            // thread function will never gets executed.
+		///            intptr_t retValue = defaultRunnableFunction->Run(pContext);
+		///      }
+		///      catch(...)
+		///      {
+		///            printf("Exception detected.\n");
+		///      }
+		///     
+		///      // do post-start thread function stuff
+		///      return retValue;
+		/// }
+		/// \endcode
+		///
+		/// In your thread begin() function:
+		///
+		/// \code 
+		/// ...
+		/// MyThreadClass myThreadClass = new MyThreadClass();
+		/// threadIds = threads.Begin(&myThreadClass, NULL, NULL, MyThreadBeginWrapper);
+		/// ...
+		/// \endcode
+		typedef intptr_t (*RunnableClassUserWrapper)(IRunnable* defaultRunnableClass, void* pContext);
+
+		 
+		/// ThreadParameters
+		/// Used for specifying thread starting parameters. Note that we do not 
+		/// include a 'start paused' parameter. The reason for this is that such 
+		/// a thing is not portable and other mechanisms can achieve the same 
+		/// effect. Thread pause/resume in general is considered bad practice.
+		struct EATHREADLIB_API ThreadParameters
+		{
+			void*       mpStack;                                       /// Pointer to stack memory. This would be the low address of the memory. A NULL value means to create a default stack. Default is NULL. Note that some platforms (such as Windows) don't support a user-supplied stack.
+			size_t      mnStackSize;                                   /// Size of the stack memory. Default is variable, depending on the platform.
+			int         mnPriority;                                    /// Value in the range of [kThreadPriorityMin, kThreadPriorityMax]. Default is kThreadPriorityDefault.
+			int         mnProcessor;                                   /// 0-based index of which processor to run the thread on. A value of -1 means to use default. Default is -1. See SetThreadProcessor for caveats regarding this value.
+			const char* mpName;                                        /// A name to give to the thread. Useful for identifying threads in a descriptive way.
+			EA::Thread::ThreadAffinityMask mnAffinityMask;             /// A bitmask representing the cores that the thread is allowed to run on.  NOTE:  This affinity mask is only applied when mnProcessor is set to kProcessorAny.
+			bool        mbDisablePriorityBoost;                        /// Whether the system should override the default behavior of boosting the thread priority as they come out of a wait state (currently only supported on Windows).
+
+			ThreadParameters();
+		};
+
+
+
+		/// Thread
+		/// 
+		/// Note that we do not provide thread suspend and resume functions.
+		/// The reason for this is that such things are inherently unsafe as 
+		/// you usually cannot know where the thread is executing when the 
+		/// suspension occurs. The safe alternative is to use signal or 
+		/// semaphore primitives to achieve the same thing in a safe way.
+		///
+		/// For performance reasons, the thread creation functions of this 
+		/// class are themselves not thread-safe. Thus if you want to call
+		/// the Begin functions for an instance of this class from multiple
+		/// threads, you will need to synchronize access to the begin 
+		/// functions yourself.
+		class EATHREADLIB_API Thread
+		{
+		public:
+			enum Status
+			{
+				kStatusNone,    /// The thread has neither started nor ended.
+				kStatusRunning, /// The thread has started but not ended.
+				kStatusEnded    /// The thread has both started and ended.
+			};
+
+			/// Thread
+			/// \brief Thread constructor.
+			Thread();
+
+			/// Thread
+			/// \brief Thread copy constructor.
+			Thread(const Thread& t);
+
+			/// Thread
+			/// \brief Thread destructor. The destructor does not take any 
+			/// action on the thread associated with it. Any threads created
+			/// by this class will continue to run and exit normally after 
+			/// this destructor has executed.
+		   ~Thread();
+
+			/// operator=
+			/// \brief Thread assignment operator.
+			Thread& operator=(const Thread& t);
+
+			/// \brief Return global RunnableFunctionUserWrapper set by user.
+			/// \return function pointer to RunnableFunctionUserWrapper function user
+			/// set, if NULL, nothing is set.
+			/// \sa RunnableFunctionUserWrapper
+			static RunnableFunctionUserWrapper GetGlobalRunnableFunctionUserWrapper();
+
+			/// \brief Set global RunnableFunctionUserWrapper.  This can only be
+			/// set once in the application life time.
+			/// \param pUserWrapper user specified wrapper function pointer.
+			/// \sa RunnableFunctionUserWrapper
+			static void SetGlobalRunnableFunctionUserWrapper(RunnableFunctionUserWrapper pUserWrapper);
+
+			/// \brief Return global RunnableClassUserWrapper set by user.
+			/// \return function pointer to RunnableClassUserWrapper function user
+			/// set, if NULL, nothing is set.
+			/// \sa RunnableClassUserWrapper
+			static RunnableClassUserWrapper GetGlobalRunnableClassUserWrapper();
+
+			/// \brief Set global RunnableClassUserWrapper.  This can only be
+			/// set once in the application life time.
+			/// \sa RunnableClassUserWrapper
+			static void SetGlobalRunnableClassUserWrapper(RunnableClassUserWrapper pUserWrapper);
+
+			/// Begin
+			/// \brief Starts a thread via a RunnableFunction.
+			/// Returns the thread id of the newly running thread.
+			/// The pContext argument is passed to the RunnableFunction and serves
+			/// to allow the caller to pass information to the thread. 
+			/// The pThreadParameters argument allows the caller to specify additional
+			/// information about how to start the thread. If this parameter is NULL, 
+			/// then default settings will be chosen.
+			/// The Begin function itself is not thread-safe. While this Thread class
+			/// can be used to Begin multiple threads, the Begin function itself cannot
+			/// safely be executed by multiple threads at a time. This is by design and
+			/// allows for a simpler more efficient library.
+			/// User can have their own RunnableFunction wrapper by specifying one in
+			/// pUserWrapper.  When pUserWrapper is used, pUserWrapper will get called
+			/// first, then pUserWrapper function can do whatever is desired before the
+			/// just-created thread's entry point is called.
+			/// \sa RunnableFunctionUserWrapper
+			ThreadId Begin(RunnableFunction pFunction, void* pContext = NULL, const ThreadParameters* pThreadParameters = NULL, RunnableFunctionUserWrapper pUserWrapper = GetGlobalRunnableFunctionUserWrapper());
+
+			/// Begin
+			/// Starts a thread via an object of the IRunnable interface.
+			/// Returns the thread id of the newly running thread.
+			/// The pContext argument is passed to the RunnableFunction and serves
+			/// to allow the caller to pass information to the thread. 
+			/// The pThreadParameters argument allows the caller to specify additional
+			/// information about how to start the thread. If this parameter is NULL, 
+			/// then default settings will be chosen.
+			/// The Begin function itself is not thread-safe. While this Thread class
+			/// can be used to Begin multiple threads, the Begin function itself cannot
+			/// safely be executed by multiple threads at a time. This is by design and
+			/// allows for a simpler more efficient library.
+			/// User can have their own RunnableClass wrapper by specifying one pUserWrapper.
+			/// When pUserWrapper is used, pUserWrapper will get called first, then
+			/// pUserWrapper function can do whatever is desired before the just-created
+			/// thread's entry point is called.
+			/// \sa RunnableClassUserWrapper
+			ThreadId Begin(IRunnable* pRunnable, void* pContext = NULL, const ThreadParameters* pThreadParameters = NULL, RunnableClassUserWrapper pUserWrapper = GetGlobalRunnableClassUserWrapper());
+
+			/// WaitForEnd
+			/// Waits for the thread associated with an object of this class
+			/// to end. Returns one of enum Status to indicate the status upon
+			/// return of this call.
+			/// This function is similar to the Posix pthread_join function and
+			/// the Windows WaitForSingleObject function.
+			/// If input pThreadReturnValue is non-NULL, it will be filled in with
+			/// the return value of the thread.
+			/// This function must be called only by a single thread at a time.
+			/// The resulting behaviour is undefined if multiple threads call this function.
+			///
+			/// Note that the timeout is specified in absolute time and not relative time.
+			///
+			/// Note also that due to the way thread scheduling works -- particularly in a
+			/// time-sliced threading environment -- that the timeout value is a hint and 
+			/// the actual amount of time passed before the timeout occurs may be significantly
+			/// more or less than the specified timeout time.
+			///
+			Status WaitForEnd(const ThreadTime& timeoutAbsolute = kTimeoutNone, intptr_t* pThreadReturnValue = NULL);
+
+			/// GetStatus
+			/// Returns one of enum GetStatus. Note that in the most general sense
+			/// the running status may change if the thread quit right after 
+			/// this call was made. But this function is useful if you know that
+			/// a function was running and you want to poll for its status while
+			/// waiting for it to exit.
+			/// If input pThreadReturnValue is non-NULL, it will be filled in with
+			/// the return value of the thread if the Status is kStatusEnded.
+			/// If the Status is not kStatusEnded, pThreadReturnValue will be ignored.
+			Status GetStatus(intptr_t* pThreadReturnValue = NULL) const;
+
+			/// GetId
+			/// Gets the Id of the thread associated with an object of this class.
+			/// This Id is unique throughout the system. This function returns a 
+			/// value that under Posix threads would be synonymous with pthread_t
+			/// and under Windows would be synonymous with a thread HANDLE (and not 
+			/// a Windows thread id).
+			ThreadId GetId() const;
+
+			/// GetPriority
+			/// Gets the priority of the thread. Return kThreadPriorityUnknown if 
+			/// the thread associated with this class isn't running. If a thread 
+			/// wants to get its own priority, it can use this class member or it 
+			/// can simply use the global SetThreadPriority function and not need 
+			/// an instance of this class. If you want to manipulate the thread 
+			/// priority via the native platform interface, you can use GetId to 
+			/// get the platform-specific identifier and use that value with native APIs.
+			///
+			/// This function can return any int except for kThreadPriorityUnknown, as the 
+			/// current thread's priority will always be knowable. A return value of kThreadPriorityDefault
+			/// means that this thread is of normal (a.k.a. default) priority.
+			/// See the documentation for thread priority constants (e.g. kThreadPriorityDefault) 
+			/// for more information about thread priority values and behaviour.
+			int GetPriority() const;
+
+			/// SetPriority
+			/// Sets the priority of the thread. Returns false if the thread associated
+			/// with this class isn't running. If a thread wants to set its own priority,
+			/// it can use this class member or it can simply use the global SetThreadPriority
+			/// function and not need an instance of this class. If you want to manipulate  
+			/// the thread priority via the native platform interface, you can use GetId to 
+			/// get the platform-specific identifier and use that value with native APIs.
+			///
+			/// Accepts any integer priority value except kThreadPriorityUnknown.
+			/// On some platforms, this function will automatically convert any invalid 
+			/// priority for that particular platform to a valid one.  A normal (a.k.a. default) thread 
+			/// priority is identified by kThreadPriorityDefault.
+			///
+			/// You can set the priority of a Thread object only if it has already begun.
+			/// You can also set the priority with the Begin function via the ThreadParameters 
+			/// argument to Begin. This design is so in order to simply the implementation, 
+			/// but being able to set ThreadParameters before Begin is something that can
+			/// be considered in the future.
+			bool SetPriority(int priority);
+
+			/// SetProcessor
+			/// Sets the processor the given thread should run on. Valid values 
+			/// are kThreadProcessorDefault, kThreadProcessorAny, or a processor
+			/// index in the range of [0, processor count). If the input value
+			/// is >= the processor count, it will be reduced to be a modulo of
+			/// the processor count. Any other invalid value will cause the processor
+			/// to be set to zero.
+			/// 
+			/// For some platforms you can set the processor of a Thread object only if it 
+			/// has already begun.
+			///
+			/// You can also set the processor with the Begin function via the ThreadParameters 
+			/// argument to Begin. This design is so in order to simply the implementation, 
+			/// but being able to set ThreadParameters before Begin is something that can
+			/// be considered in the future. This is the most reliable way to set the thread
+			/// processor, as it works on all platforms. 
+			void SetProcessor(int nProcessor);
+
+			/// Wake
+			/// Wakes up a sleeping thread if it is sleeping. This necessarily can only
+			/// be called from a thread other than the sleeping thread. You must be careful
+			/// to not rely on this function as a synchronization primitive. For example,
+			/// in the general case you cannot be sure that after calling Wake that the 
+			/// thread will be awake, as it is possible that right after you called Wake
+			/// the thread immediately went back to sleep before you could do anything.
+			/// Nevertheless, this function is useful in waking up a thread from a 
+			/// (potentially long) sleep so that it can examine data, lock a synchronization
+			/// primitive, or simply exit. 
+			///
+			/// Note that this class has no member Sleep function. The reason is that a 
+			/// thread can only put itself to sleep and cannot put other threads to sleep.
+			/// The thread should use the static Sleep function to put itself to sleep.
+			void Wake();
+
+			/// GetName
+			/// Returns the name of the thread assigned by the SetName function.
+			/// If the thread was not named by the SetName function, then the name is empty ("").
+			const char* GetName() const;
+			
+			/// SetName
+			/// Sets a descriptive name or the thread. On some platforms this name is passed
+			/// on to the debugging tools so they can see this name. The name length, including
+			/// a terminating 0 char, is limited to EATHREAD_NAME_SIZE characters. Any characters
+			/// beyond that are ignored.
+			/// 
+			/// You can set the name of a Thread object only if it has already begun.
+			/// You can also set the name with the Begin function via the ThreadParameters 
+			/// argument to Begin. This design is so in order to simply the implementation, 
+			/// but being able to set ThreadParameters before Begin is something that can
+			/// be considered in the future.
+			///
+			/// Some platforms (e.g. Linux) have the restriction this function works propertly only
+			/// when called by the same thread that you want to name. Given this situation,
+			/// the most portable way to use this SetName function is to either always call
+			/// it from the thread to be named or to use the ThreadParameters to give the 
+			/// thread a name before it is started and let the started thread name itself.
+			void SetName(const char* pName);
+
+			/// SetAffinityMask
+			/// Sets an affinity mask for the thread.  On some platforms, this OS feature is
+			/// not supported.  In this situation, you are at the mercy of the OS thread scheduler.
+			/// 
+			/// Example(s):
+			/// "00000100" -> thread is pinned to processor 2
+			/// "01010100" -> thread is pinned to processor 2, 4, and 6.
+			void SetAffinityMask(ThreadAffinityMask mnAffinityMask);
+
+			/// GetAffinityMask
+			/// Returns the affinity mask for this specific thread.
+			ThreadAffinityMask GetAffinityMask();
+
+			/// SetDefaultProcessor
+			/// Sets the default processor to create threads with. To specify the processor
+			/// for a running thread, use SetProcessor() or specify the processor in the
+			/// thread creation ThreadParameters.  
+			/// 
+			/// If nProcessor is set to kProcessorAny, EAThread will automatically determine  
+			/// which processor to launch threads to.
+			///
+			/// Please refer to SetProcessor for valid values for the nProcessor argument.
+			static void SetDefaultProcessor(int nProcessor) 
+			  { sDefaultProcessor = nProcessor; }
+
+
+			/// GetDefaultProcessor
+			/// Gets the default processor to create threads with.
+			static int GetDefaultProcessor()
+				{ return sDefaultProcessor; }
+
+
+			/// SetDefaultProcessorMask
+			/// Sets which processors created threads should be explicitly run on. 
+			/// The default value is 0xffffffffffffffff.
+			/// Each bit refers to the associated processor. A mask of 0xffffffffffffffff
+			/// means to allow running on any processor, and on desktop platforms such
+			/// as Windows it means that the OS decides what processor to use on its own.
+			/// Not all platforms support this functionality, even if multiple processors are present.
+			static void SetDefaultProcessorMask(uint64_t mask)
+				{ sDefaultProcessorMask.SetValue(mask); }
+
+
+			/// GetDefaultProcessorMask
+			/// Returns the mask set by SetDefaultProcessorMask.
+			static uint64_t GetDefaultProcessorMask()
+				{ return sDefaultProcessorMask.GetValue(); }
+
+
+			/// GetPlatformData
+			/// Returns platform-specific data for this thread for debugging uses or 
+			/// other cases whereby special (and non-portable) uses are required.
+			/// The value returned is a struct of type EAThreadData.
+			void* GetPlatformData()
+				{ return &mThreadData; }
+
+		protected:
+			static RunnableFunctionUserWrapper sGlobalRunnableFunctionUserWrapper;
+			static RunnableClassUserWrapper    sGlobalRunnableClassUserWrapper;
+			static EA::Thread::AtomicInt32     sDefaultProcessor;
+			static EA::Thread::AtomicUint64    sDefaultProcessorMask;
+			EAThreadData                       mThreadData;
+		};
+
+
+		/// ThreadFactory
+		/// 
+		/// Implements a factory-based creation and destruction mechanism for class Thread.
+		/// A primary use of this would be to allow the Thread implementation to reside in
+		/// a private library while users of the class interact only with the interface
+		/// header and the factory. The factory provides conventional create/destroy 
+		/// semantics which use global operator new, but also provides manual construction/
+		/// destruction semantics so that the user can provide for memory allocation 
+		/// and deallocation.
+		class EATHREADLIB_API ThreadFactory
+		{
+		public:
+			static Thread* CreateThread();                  // Internally implemented as: return new Thread;
+			static void    DestroyThread(Thread* pThread);  // Internally implemented as: delete pThread;
+
+			static size_t  GetThreadSize();                 // Internally implemented as: return sizeof(Thread);
+			static Thread* ConstructThread(void* pMemory);  // Internally implemented as: return new(pMemory) Thread;
+			static void    DestructThread(Thread* pThread); // Internally implemented as: pThread->~Thread();
+		};
+
+
+		/// MakeThread
+		///
+		/// Simplify creating threads with lambdas
+		///
+		template <typename F>
+		auto MakeThread(F&& f, const EA::Thread::ThreadParameters& params = EA::Thread::ThreadParameters())
+		{
+			typedef std::decay_t<F> decayed_f_t;
+
+			auto get_memory = [] 
+			{
+				const auto sz = sizeof(decayed_f_t);
+				auto* pAllocator = EA::Thread::GetAllocator();
+
+				if(pAllocator)
+					return pAllocator->Alloc(sz);
+				else
+					return malloc(sz);
+			};
+
+			auto thread_enty = [](void* pMemory) -> intptr_t
+			{
+				auto free_memory = [](void* p)
+				{
+					auto* pAllocator = EA::Thread::GetAllocator();
+					if(pAllocator)
+						return pAllocator->Free(p);
+					else
+						return free(p);
+				};
+
+				auto* pF = reinterpret_cast<decayed_f_t*>(pMemory);
+				(*pF)();
+				pF->~decayed_f_t();
+				free_memory(pF);
+				return 0;
+			};
+
+			EA::Thread::Thread thread;
+			thread.Begin(thread_enty, new(get_memory()) decayed_f_t(std::forward<F>(f)), &params);  // deleted in the thread entry function
+			return thread;
+		}
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+#if defined(EA_DLL) && defined(_MSC_VER)
+	// re-enable warning 4251 (it's a level-1 warning and should not be suppressed globally)
+	#pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_EATHREAD_THREAD_H
+
+
+
+
+
+

+ 190 - 0
include/eathread/gcc/eathread_atomic_gcc.h

@@ -0,0 +1,190 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+#ifndef EATHREAD_GCC_EATHREAD_ATOMIC_GCC_H
+#define EATHREAD_GCC_EATHREAD_ATOMIC_GCC_H
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+		/// and declaration specifications per platform.
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T            ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x)
+				: mValue(x.GetValue()) {}
+
+			AtomicInt& operator=(const ThisType& x)
+				{ mValue = x.GetValue(); return *this; }
+
+			ValueType GetValue() const
+				{ return mValue; }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n);
+			bool      SetValueConditional(ValueType n, ValueType condition);
+			ValueType Increment();
+			ValueType Decrement();
+			ValueType Add(ValueType n);
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+			inline ValueType  operator+=(ValueType n)          { return Add(n);}
+			inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+			inline ValueType  operator++()                     { return Increment();}
+			inline ValueType  operator++(int)                  { return Increment() - 1;}
+			inline ValueType  operator--()                     { return Decrement(); }
+			inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+		protected:
+			volatile ValueType mValue;
+		};
+
+
+		// Recent versions of GCC have atomic primitives built into the compiler and standard library.
+		#if defined(EA_COMPILER_CLANG) || defined(__APPLE__) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 403)) || defined(EA_COMPILER_RVCT) // GCC 4.3 or later. Depends on the GCC implementation.
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+				{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+				{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+			template <> inline
+			bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+				{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+				{ return __sync_add_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+				{ return __sync_sub_and_fetch(&mValue, 1); }
+
+			template <> inline
+			AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+
+			template <> inline
+			AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+				{ return __sync_add_and_fetch(&mValue, n); }
+		#endif
+
+	} // namespace Thread
+
+} // namespace EA
+
+
+
+#endif // EATHREAD_GCC_EATHREAD_ATOMIC_GCC_H
+
+
+
+
+
+
+
+
+

+ 73 - 0
include/eathread/gcc/eathread_sync_gcc.h

@@ -0,0 +1,73 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_GCC_EATHREAD_SYNC_GCC_H
+#define EATHREAD_GCC_EATHREAD_SYNC_GCC_H
+
+
+#include <EABase/eabase.h>
+
+
+#define EA_THREAD_SYNC_IMPLEMENTED
+
+
+// EAProcessorPause
+// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for 
+// high performance spinning, as otherwise a high performance penalty incurs. 
+
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+	#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+#else
+	#define EAProcessorPause()
+#endif
+
+
+
+// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+// The x86 processor memory architecture ensures read and write consistency on both single and
+// multi processing systems. This makes programming simpler but limits maximimum system performance.
+// We define EAReadBarrier here to be the same as EACompilerMemory barrier in order to limit the 
+// compiler from making any assumptions at its level about memory usage. Year 2003+ versions of the 
+// Microsoft SDK define a 'MemoryBarrier' statement which has the same effect as EAReadWriteBarrier.
+
+#if (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+	#define EAReadBarrier      __sync_synchronize
+	#define EAWriteBarrier     __sync_synchronize
+	#define EAReadWriteBarrier __sync_synchronize
+#else
+	#define EAReadBarrier      EACompilerMemoryBarrier
+	#define EAWriteBarrier     EACompilerMemoryBarrier
+	#define EAReadWriteBarrier EACompilerMemoryBarrier
+#endif
+
+
+// EACompilerMemoryBarrier
+
+#if defined(EA_PROCESSOR_ARM) || defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+	#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+#else
+	#define EACompilerMemoryBarrier()
+#endif
+
+
+
+#endif // EATHREAD_GCC_EATHREAD_SYNC_GCC_H
+
+
+
+
+
+
+
+

+ 29 - 0
include/eathread/internal/atomic.h

@@ -0,0 +1,29 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_ATOMIC_H
+#define EATHREAD_INTERNAL_ATOMIC_H
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		typedef int64_t(*AtomicAdd64Function)(volatile int64_t *ptr, int64_t value);
+		typedef int64_t(*AtomicGetValue64Function)(volatile int64_t *ptr);
+		typedef int64_t(*AtomicSetValue64Function)(volatile int64_t *ptr, int64_t value);
+		typedef bool(*AtomicSetValueConditional64Function)(volatile int64_t *ptr, int64_t value, int64_t condition);
+
+
+		extern AtomicAdd64Function AtomicAdd64;
+		extern AtomicGetValue64Function AtomicGetValue64;
+		extern AtomicSetValue64Function AtomicSetValue64;
+		extern AtomicSetValueConditional64Function AtomicSetValueConditional64;
+	}
+}
+
+#endif

+ 634 - 0
include/eathread/internal/config.h

@@ -0,0 +1,634 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_INTERNAL_CONFIG_H
+#define EATHREAD_INTERNAL_CONFIG_H
+
+
+#include <EABase/eabase.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <stddef.h>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_VERSION
+//
+// We more or less follow the conventional EA packaging approach to versioning 
+// here. A primary distinction here is that minor versions are defined as two
+// digit entities (e.g. .03") instead of minimal digit entities ".3"). The logic
+// here is that the value is a counter and not a floating point fraction.
+// Note that the major version doesn't have leading zeros.
+//
+// Example version strings:
+//      "0.91.00"   // Major version 0, minor version 91, patch version 0. 
+//      "1.00.00"   // Major version 1, minor and patch version 0.
+//      "3.10.02"   // Major version 3, minor version 10, patch version 02.
+//     "12.03.01"   // Major version 12, minor version 03, patch version 
+//
+// Example usage:
+//     printf("EATHREAD_VERSION version: %s", EATHREAD_VERSION);
+//     printf("EATHREAD_VERSION version: %d.%d.%d", EATHREAD_VERSION_N / 10000 % 100, EATHREAD_VERSION_N / 100 % 100, EATHREAD_VERSION_N % 100);
+//
+#ifndef EATHREAD_VERSION
+	#define EATHREAD_VERSION   "1.32.09"
+	#define EATHREAD_VERSION_N  13209
+
+	// Older style version info
+	#define EATHREAD_VERSION_MAJOR (EATHREAD_VERSION_N / 100 / 100 % 100)
+	#define EATHREAD_VERSION_MINOR (EATHREAD_VERSION_N       / 100 % 100)
+	#define EATHREAD_VERSION_PATCH (EATHREAD_VERSION_N             % 100)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// _GNU_SOURCE
+//
+// Defined or not defined.
+// If this is defined then GlibC extension functionality is enabled during 
+// calls to glibc header files.
+//
+#if !defined(_GNU_SOURCE)
+	#define _GNU_SOURCE
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_TLS_COUNT
+//
+// Defined as compile-time constant integer > 0.
+//
+#if !defined(EATHREAD_TLS_COUNT)
+	#define EATHREAD_TLS_COUNT 16
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_THREADS_AVAILABLE
+//
+// Defined as 0 or 1
+// Defines if threading is supported on the given platform.
+// If 0 then the EAThread implementation is not capable of creating threads,
+// but other facilities (e.g. mutex) work in a non-thread-aware way.
+//
+#ifndef EA_THREADS_AVAILABLE
+	#define EA_THREADS_AVAILABLE 1
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_USE_CPP11_CONCURRENCY
+//
+// Defined as 0 or 1
+//
+#ifndef EA_USE_CPP11_CONCURRENCY
+	#if defined(EA_PLATFORM_WINDOWS) && !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP) 
+		#define EA_USE_CPP11_CONCURRENCY 1
+	#else
+		#define EA_USE_CPP11_CONCURRENCY 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_USE_COMMON_ATOMICINT_IMPLEMENTATION
+//
+// Use the common EAThread AtomicInt implementation on all platforms.
+//
+// Defined as 0 or 1
+//
+#ifndef EA_USE_COMMON_ATOMICINT_IMPLEMENTATION
+	#define EA_USE_COMMON_ATOMICINT_IMPLEMENTATION 1
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_OPENKODE_THREADS_AVAILABLE
+//
+// Defined as 0 or 1
+//
+#ifndef EA_OPENKODE_THREADS_AVAILABLE
+	#define EA_OPENKODE_THREADS_AVAILABLE 0  // used historically on the Marmalade (Airplay) platform.
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EAT_ASSERT_ENABLED
+//
+// Defined as 0 or 1, default is 1 if EA_DEBUG or _DEBUG is defined.
+// If defined as 1, then assertion failures are reported via EA::Thread::AssertionFailure(). 
+// 
+#ifndef EAT_ASSERT_ENABLED
+	#if defined(EA_DEBUG) || defined(_DEBUG)
+		#define EAT_ASSERT_ENABLED 1
+	#else
+		#define EAT_ASSERT_ENABLED 0
+	#endif
+#endif
+
+
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_STRINGIFY_HELPER(x) #x
+	#define EAT_STRINGIFY(x) EAT_STRINGIFY_HELPER(x)
+	#define EAT_ASSERT(expression) \
+		EA_DISABLE_VC_WARNING(4127) \
+		do { \
+			EA_ANALYSIS_ASSUME(expression); \
+			if (!(expression) ) \
+				EA::Thread::AssertionFailure(__FILE__ "(" EAT_STRINGIFY(__LINE__) "): " #expression); \
+		} while(0) \
+		EA_RESTORE_VC_WARNING()
+#else
+	#define EAT_ASSERT(expression)
+#endif
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_ASSERT_MSG(expression, msg) \
+		EA_DISABLE_VC_WARNING(4127) \
+		do { \
+			EA_ANALYSIS_ASSUME(expression); \
+			if (!(expression) ) \
+				EA::Thread::AssertionFailure(msg); \
+		} while(0) \
+		EA_RESTORE_VC_WARNING()
+#else
+	#define EAT_ASSERT_MSG(expression, msg)
+#endif
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_ASSERT_FORMATTED(expression, pFormat, ...) \
+		EA_DISABLE_VC_WARNING(4127) \
+		do { \
+			EA_ANALYSIS_ASSUME(expression); \
+			if (!(expression) ) \
+				EA::Thread::AssertionFailureV(pFormat, __VA_ARGS__); \
+		} while(0) \
+		EA_RESTORE_VC_WARNING()
+#else
+	#define EAT_ASSERT_FORMATTED(expression, pFormat, ...)
+#endif
+
+#if EAT_ASSERT_ENABLED
+	#define EAT_FAIL_MSG(msg) (EA::Thread::AssertionFailure(msg))
+#else
+	#define EAT_FAIL_MSG(msg)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EAT_COMPILETIME_ASSERT   
+//
+// Compile-time assertion for this module.
+// C-like declaration:
+//    void EAT_COMPILETIME_ASSERT(bool bExpression);
+//
+#if !defined(EAT_COMPILETIME_ASSERT)
+	#define EAT_COMPILETIME_ASSERT(expression) static_assert(expression, EA_STRINGIFY(expression))
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_TLSALLOC_DTOR_ENABLED
+//
+// Defined as 0 or 1. Default is 1.
+// Defines if the TLSAlloc class destructor frees the TLS thread handle.
+// This won't make a difference unless you were using EAThread in a DLL and 
+// you were repeatedly loading and unloading DLLs.
+// See eathread_pc.cpp for usage of this and more info about the situation.
+//
+#ifndef EATHREAD_TLSALLOC_DTOR_ENABLED
+	#define EATHREAD_TLSALLOC_DTOR_ENABLED 1
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_LIKELY / EATHREAD_UNLIKELY
+//
+// Defined as a macro which gives a hint to the compiler for branch
+// prediction. GCC gives you the ability to manually give a hint to 
+// the compiler about the result of a comparison, though it's often
+// best to compile shipping code with profiling feedback under both
+// GCC (-fprofile-arcs) and VC++ (/LTCG:PGO, etc.). However, there 
+// are times when you feel very sure that a boolean expression will
+// usually evaluate to either true or false and can help the compiler
+// by using an explicity directive...
+//
+// Example usage:
+//     if(EATHREAD_LIKELY(a == 0)) // Tell the compiler that a will usually equal 0.
+//         { ... }
+//
+// Example usage:
+//     if(EATHREAD_UNLIKELY(a == 0)) // Tell the compiler that a will usually not equal 0.
+//         { ... }
+//
+#ifndef EATHREAD_LIKELY
+	#define EATHREAD_LIKELY(x) EA_LIKELY(x)
+	#define EATHREAD_UNLIKELY(x) EA_UNLIKELY(x)
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_NAMING
+//
+// Defined as 0, 1 (enabled), or 2 (enabled only when debugger is present). 
+// 
+#define EATHREAD_NAMING_DISABLED 0
+#define EATHREAD_NAMING_ENABLED  1
+#define EATHREAD_NAMING_OPTIONAL 2
+
+#ifndef EATHREAD_NAMING
+	#if defined(EA_SHIP) || defined(EA_FINAL) // These are two de-facto standard EA defines for identifying a shipping build.
+		#define EATHREAD_NAMING 0
+	#else
+		#define EATHREAD_NAMING EATHREAD_NAMING_ENABLED // or EATHREAD_NAMING_OPTIONAL? 
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_NAME_SIZE
+//
+// Specifies the max size to support for naming threads.
+// This value can be changed as desired.
+//
+#ifndef EATHREAD_NAME_SIZE
+	#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_UNIX)
+		#define EATHREAD_NAME_SIZE 64
+	#else
+		#define EATHREAD_NAME_SIZE 32
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EA_XBDM_ENABLED
+//
+// Defined as 0 or 1, with 1 being the default for debug builds.
+// This controls whether xbdm library usage is enabled on XBox 360. This library
+// allows for runtime debug functionality. But shipping applications are not
+// allowed to use xbdm. 
+//
+#if !defined(EA_XBDM_ENABLED)
+	#if defined(EA_DEBUG)
+		#define EA_XBDM_ENABLED 1
+	#else
+		#define EA_XBDM_ENABLED 0
+	#endif
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DLL
+//
+// Defined as 0 or 1. The default is dependent on the definition of EA_DLL.
+// If EA_DLL is defined, then EATHREAD_DLL is 1, else EATHREAD_DLL is 0.
+// EA_DLL is a define that controls DLL builds within the EAConfig build system. 
+// EATHREAD_DLL controls whether EATHREAD_VERSION is built and used as a DLL. 
+// Normally you wouldn't do such a thing, but there are use cases for such
+// a thing, particularly in the case of embedding C++ into C# applications.
+//
+#ifndef EATHREAD_DLL
+	#if defined(EA_DLL)
+		#define EATHREAD_DLL 1
+	#else
+		#define EATHREAD_DLL 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREADLIB_API
+//
+// This is used to label functions as DLL exports under Microsoft platforms.
+// If EA_DLL is defined, then the user is building EAThread as a DLL and EAThread's
+// non-templated functions will be exported. EAThread template functions are not
+// labelled as EATHREADLIB_API (and are thus not exported in a DLL build). This is 
+// because it's not possible (or at least unsafe) to implement inline templated 
+// functions in a DLL.
+//
+// Example usage of EATHREADLIB_API:
+//    EATHREADLIB_API int someVariable = 10;         // Export someVariable in a DLL build.
+//
+//    struct EATHREADLIB_API SomeClass{              // Export SomeClass and its member functions in a DLL build.
+//        EATHREADLIB_LOCAL void PrivateMethod();    // Not exported.
+//    };
+//
+//    EATHREADLIB_API void SomeFunction();           // Export SomeFunction in a DLL build.
+//
+// For GCC, see http://gcc.gnu.org/wiki/Visibility
+//
+#ifndef EATHREADLIB_API // If the build file hasn't already defined this to be dllexport...
+	#if EATHREAD_DLL 
+		#if defined(_MSC_VER)
+			#define EATHREADLIB_API      __declspec(dllimport)
+			#define EATHREADLIB_LOCAL
+		#elif defined(__CYGWIN__)
+			#define EATHREADLIB_API      __attribute__((dllimport))
+			#define EATHREADLIB_LOCAL
+		#elif (defined(__GNUC__) && (__GNUC__ >= 4))
+			#define EATHREADLIB_API      __attribute__ ((visibility("default")))
+			#define EATHREADLIB_LOCAL    __attribute__ ((visibility("hidden")))
+		#else
+			#define EATHREADLIB_API
+			#define EATHREADLIB_LOCAL
+		#endif
+	#else
+		#define EATHREADLIB_API
+		#define EATHREADLIB_LOCAL
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_ALLOC_PREFIX
+//
+// Defined as a string literal. Defaults to this package's name.
+// Can be overridden by the user by predefining it or by editing this file.
+// This define is used as the default name used by this package for naming
+// memory allocations and memory allocators.
+//
+// All allocations names follow the same naming pattern:
+//     <package>/<module>[/<specific usage>]
+// 
+// Example usage:
+//     void* p = pCoreAllocator->Alloc(37, EATHREAD_ALLOC_PREFIX, 0);
+//
+// Example usage:
+//     gMessageServer.GetMessageQueue().get_allocator().set_name(EATHREAD_ALLOC_PREFIX "MessageSystem/Queue");
+//
+#ifndef EATHREAD_ALLOC_PREFIX
+	#define EATHREAD_ALLOC_PREFIX "EAThread/"
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_USE_STANDARD_NEW
+//
+// Defines whether we use the basic standard operator new or the named
+// extended version of operator new, as per the EASTL package.
+//
+#ifndef EATHREAD_USE_STANDARD_NEW
+	#if EATHREAD_DLL  // A DLL must provide its own implementation of new, so we just use built-in new.
+		#define EATHREAD_USE_STANDARD_NEW 1
+	#else
+		#define EATHREAD_USE_STANDARD_NEW 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_NEW
+//
+// This is merely a wrapper for operator new which can be overridden and 
+// which has debug/release forms.
+//
+// Example usage:
+//    SomeClass* pObject = EATHREAD_NEW("SomeClass") SomeClass(1, 2, 3);
+//
+#ifndef EATHREAD_NEW
+	#if EATHREAD_USE_STANDARD_NEW
+			#define EATHREAD_NEW(name)                            new
+			#define EATHREAD_NEW_ALIGNED(alignment, offset, name) new
+			#define EATHREAD_DELETE                               delete
+	#else
+		#if defined(EA_DEBUG)
+			#define EATHREAD_NEW(name)                            new(name, 0, 0, __FILE__, __LINE__)
+			#define EATHREAD_NEW_ALIGNED(alignment, offset, name) new(alignment, offset, name, 0, 0, __FILE__, __LINE__)
+			#define EATHREAD_DELETE                               delete
+		#else
+			#define EATHREAD_NEW(name)                            new(name, 0, 0, 0, 0)
+			#define EATHREAD_NEW_ALIGNED(alignment, offset, name) new(alignment, offset, name, 0, 0, 0, 0)
+			#define EATHREAD_DELETE                               delete
+		#endif
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS
+//
+// This symbol is defined if a platform has both native and emulated atomics.
+// Currently the only platform that requires this is iOS as earlier versions 
+// of the operating system (ie: iOS 3) do not provide OS support for 64-bit
+// atomics while later versions (ie: iOS 4/5) do.
+#ifndef EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS
+	#if defined(__APPLE__)
+		#define EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS 1 
+	#else
+		#define EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GLIBC_BACKTRACE_AVAILABLE
+//
+// You generally need to be using GCC, GLIBC, and Linux for backtrace to be available.
+// And even then it's available only some of the time.
+//
+#if !defined(EATHREAD_GLIBC_BACKTRACE_AVAILABLE)
+	#if (defined(__clang__) || defined(__GNUC__)) && (defined(EA_PLATFORM_LINUX) || defined(__APPLE__)) && !defined(__CYGWIN__) && !defined(EA_PLATFORM_ANDROID)
+		#define EATHREAD_GLIBC_BACKTRACE_AVAILABLE 1
+	#else
+		#define EATHREAD_GLIBC_BACKTRACE_AVAILABLE 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GLIBC_VERSION
+//
+// We provide our own GLIBC numeric version to determine when system library 
+// calls are available.
+//
+#if defined(__GLIBC__)
+	#define EATHREAD_GLIBC_VERSION ((__GLIBC__ * 1000) + __GLIBC_MINOR__) 
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GETCALLSTACK_SUPPORTED
+//
+// Defined as 0 or 1.
+// Identifies whether runtime callstack unwinding (i.e. GetCallstack()) is 
+// supported for the given platform. In some cases it may be that unwinding 
+// support code is present but it hasn't been tested for reliability and may
+// have bugs preventing it from working properly. In some cases (e.g. x86) 
+// it may be that optimized builds make it difficult to read the callstack 
+// reliably, despite that we flag the platform as supported.
+//
+#if !defined(EATHREAD_GETCALLSTACK_SUPPORTED)
+	#if EATHREAD_GLIBC_BACKTRACE_AVAILABLE          // Typically this means Linux on x86.
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_IPHONE)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_ANDROID)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_IPHONE_SIMULATOR)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_WINDOWS_PHONE) && defined(EA_PROCESSOR_ARM)       
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 0
+	#elif defined(EA_PLATFORM_MICROSOFT)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_LINUX)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_OSX)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_SONY)
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_CYGWIN)               // Support hasn't been verified.
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 0
+	#elif defined(EA_PLATFORM_MINGW)                // Support hasn't been verified.
+		#define EATHREAD_GETCALLSTACK_SUPPORTED 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEBUG_DETAIL_ENABLED
+//
+// Defined as 0 or 1. 
+// If true then detailed debug info is displayed. Can be enabled in opt builds.
+//
+#ifndef EATHREAD_DEBUG_DETAIL_ENABLED
+	#define EATHREAD_DEBUG_DETAIL_ENABLED 0
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_MIN_ABSOLUTE_TIME
+//
+// Defined as a time in milliseconds. 
+// Locks and waits allow the user to specify an absolute timeout time. In order
+// to detect that the user accidentally specified a relative time, we define a
+// minimum allowed absolute time which we assert on. This minimum time is one
+// that in practice is impossible to be a future absolute time.
+//
+#ifndef EATHREAD_MIN_ABSOLUTE_TIME
+	#define EATHREAD_MIN_ABSOLUTE_TIME  10000
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
+//
+// Defined as 0 or 1. 
+// If true then the platform supports a user specified thread affinity mask.
+//
+#ifndef EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
+	#if   defined(EA_PLATFORM_CAPILANO)
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 1
+	#elif defined(EA_PLATFORM_SONY)
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 1
+	#elif defined(EA_USE_CPP11_CONCURRENCY) && EA_USE_CPP11_CONCURRENCY
+		// CPP11 doesn't not provided a mechanism to set thread affinities.
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 0
+	#elif defined(EA_PLATFORM_ANDROID) || defined(EA_PLATFORM_APPLE) || defined(EA_PLATFORM_UNIX)
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 0
+	#else
+		#define EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 1
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY
+//
+// Defined as 0 or 1. 
+// 
+//
+#ifndef EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY
+	#define EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY 0
+#endif
+
+
+	
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_SCEDBG_ENABLED 
+//
+// Defined as 0 or 1. 
+// Informs EAThread if Sony Debug libraries are available for us. 
+//
+#ifndef EATHREAD_SCEDBG_ENABLED 
+	#ifndef EA_SCEDBG_ENABLED
+		#define EATHREAD_SCEDBG_ENABLED 0
+	#else
+		#define EATHREAD_SCEDBG_ENABLED  EA_SCEDBG_ENABLED
+	#endif 
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEBUG_BREAK
+//
+#ifndef EATHREAD_DEBUG_BREAK
+	#ifdef __MSC_VER
+		#define EATHREAD_DEBUG_BREAK() __debugbreak()
+	#else
+		#define EATHREAD_DEBUG_BREAK() *(volatile int*)(0) = 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_C11_ATOMICS_AVAILABLE
+//
+#ifndef EATHREAD_C11_ATOMICS_AVAILABLE
+	#if (defined(EA_ANDROID_SDK_LEVEL) && (EA_ANDROID_SDK_LEVEL >= 21))  
+		#define EATHREAD_C11_ATOMICS_AVAILABLE 1
+	#else
+		#define EATHREAD_C11_ATOMICS_AVAILABLE 0
+	#endif
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_ALIGNMENT_CHECK
+//
+
+namespace EA {
+namespace Thread {
+namespace detail {
+	// Used to assert that memory accesses on x86-64 are atomic when "naturally" aligned to the size of registers.
+	template <typename T>
+	inline bool IsNaturallyAligned(T* p)
+	{
+		return ((uintptr_t)p & (sizeof(EA_PLATFORM_WORD_SIZE) - 1)) == 0;
+	}
+}}}
+
+#ifndef EATHREAD_ALIGNMENT_CHECK
+	#define EATHREAD_ALIGNMENT_CHECK(address) EAT_ASSERT_MSG(EA::Thread::detail::IsNaturallyAligned(address), "address is not naturally aligned.")	
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_APPLE_GETMODULEINFO_ENABLED 
+//
+// This functionality has been migrated to EACallstack.  We provide a preprocessor switch for backwards compatibility
+// until the code path is removed completely in a future release.
+//
+// Defined as 0 or 1. 
+//
+#ifndef EATHREAD_APPLE_GETMODULEINFO_ENABLED 
+	#define EATHREAD_APPLE_GETMODULEINFO_ENABLED 0
+#endif
+
+
+
+#endif // Header include guard
+
+
+

+ 241 - 0
include/eathread/internal/deprecated.h

@@ -0,0 +1,241 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_DEPRECATED_H
+#define EATHREAD_INTERNAL_DEPRECATED_H
+
+#include <EABase/eabase.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// This header provides facilities for nudging users off of deprecated code.
+// 
+// The goal is to provide a gradual migration where users become aware of the
+// accumulated technical debt considerably before they are required to address
+// the problem. To this end, once a feature has been deprecated, we may escalate
+// from warnings, to assertions, to build warnings before actual removal.
+//
+// EATHREAD_REMOVE_DEPRECATED_API				can be defined in client code to force build time errors
+// EATHREAD_DEPRECATED_MEMBER_WARN_ON_USE		generate runtime warnings on write to deprecated members
+// EATHREAD_DEPRECATED_MEMBER_ASSERT_ON_USE		generate runtime assertions on write to deprecated members
+// EATHREAD_DEPRECATED_MEMBER_WARN_ON_BUILD		generate runtime assertions and build warnings on access
+//
+// TODO: consider migrating these facilities to a shared location once they're stable (EAStdC)
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_REMOVE_DEPRECATED_API
+//
+// Defining this macro in client code will remove any deprecated API from the
+// EAThread public headers. This can be useful to temporarily define locally
+// in dependent modules to find and eliminate any contained code that depends
+// on any deprecated EAThread features.
+//
+// Another approach is to enable broader deprecate culling by defining
+// EA_REMOVE_DEPRECATED_API within the build for the module you wish to
+// eliminate deprecated code. This should remove deprecates for all libraries
+// that support it.
+//
+// Note: Deprecated API culling macros should not be defined globally for a
+// build. Doing so will flag all use of deprecated API across all modules
+// in a game which is typically more noise than desired and makes a piecewise
+// approach more difficult. Instead, define the flags only when building the
+// module where you wish to eliminate use of deprecated code.
+#if defined(EA_REMOVE_DEPRECATED_API)
+	// respect the master control if it has been provided
+	#define EATHREAD_REMOVE_DEPRECATED_API
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEPRECATED_MEMBER_WARN_ON_USE
+//
+// Simplifies the process of disabling public members of EAThread classes when
+// building with deprecated code removed. This macro renames the member variable
+// when deprecate culling is enabled to avoid changing the size of the structure
+// which can cause binary incompatibility issues.
+#if defined(EATHREAD_REMOVE_DEPRECATED_API)
+	// rename deprecated members to trigger a build error
+	#define EATHREAD_DEPRECATED_MEMBER_WARN_ON_USE(Type, Name) EA_DEPRECATED Type EA_PREPROCESSOR_JOIN2(name, _deprecated)
+#else
+	// member enabled, but use runtime deprecation warnings only
+	#define EATHREAD_DEPRECATED_MEMBER_WARN_ON_USE(Type, Name) EA::Thread::DeprecatedMemberWarn<Type> Name
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEPRECATED_MEMBER_ASSERT_ON_USE
+//
+// This is similar to recently deprecated member except that it will generate
+// an assertion failure on assignment.
+#if defined(EATHREAD_REMOVE_DEPRECATED_API)
+	// rename deprecated members to trigger a build error
+	#define EATHREAD_DEPRECATED_MEMBER_ASSERT_ON_USE(Type, Name) EA_DEPRECATED Type EA_PREPROCESSOR_JOIN2(name, _deprecated)
+#else
+	// member enabled, but use runtime assertions only
+	#define EATHREAD_DEPRECATED_MEMBER_ASSERT_ON_USE(Type, Name) EA::Thread::DeprecatedMemberError<Type> Name
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// EATHREAD_DEPRECATED_MEMBER_WARN_ON_BUILD
+//
+// This is similar to deprecated member except that it additionally
+// add deprecation markup which will trigger warnings during the build. Note,
+// this will often get converted into a build error with warnings as error
+// enabled. For this reason, consider using the other macros first.
+#if defined(EATHREAD_REMOVE_DEPRECATED_API)
+	// rename deprecated members to trigger a build error
+	#define EATHREAD_DEPRECATED_MEMBER_WARN_ON_BUILD(Type, Name) EATHREAD_DEPRECATED_MEMBER_ASSERT_ON_USE(Type, Member)
+#else
+	// member enabled, assert on set but also use build-time deprecation warnings
+	#define EATHREAD_DEPRECATED_MEMBER_WARN_ON_BUILD(Type, Name) EA_DEPRECATED EATHREAD_DEPRECATED_MEMBER_ASSERT_ON_USE(Type, Name)
+#endif
+
+namespace EA {
+namespace Thread {
+
+// Issues the given warning if the flag is unset (and sets it), otherwise does nothing.  This can be use to limit the
+// amount of message spam coming from a specific usage.
+EATHREADLIB_API void WarnOnce(bool* pHasTriggered, const char* message);
+EATHREADLIB_API void ErrorOnce(bool* pHasTriggered, const char* message);
+
+// This template allows the creation of classes that implicitly convert to the wrapped type but will warn on assignment.
+// This is useful in removing public member variables from our public API.  The goal here is to provide a softer nudge
+// than a build error.  Deprecation markup on the member is a similar approach but will often trigger a build failure
+// as warnings as errors is commonly enabled.
+// TODO: does not work for types that support dereferencing
+// TODO: also missing other operator forwarding
+template <typename T>
+class DeprecatedMemberWarn
+{
+public:
+#ifdef EA_COMPILER_NO_DEFAULTED_FUNCTIONS
+	DeprecatedMemberWarn(){}
+#else
+	DeprecatedMemberWarn() = default;
+#endif
+	DeprecatedMemberWarn(T rhs): mValue(rhs) {}
+
+	//DeprecatedMemberWarn& operator=(DeprecatedMemberWarn&&) = default; // TODO: Why doesn't this work
+#ifdef EA_COMPILER_NO_DEFAULTED_FUNCTIONS
+	DeprecatedMemberWarn& operator=(const DeprecatedMemberWarn& rhs)
+	{
+		mValue = rhs.mValue;
+		return *this;
+	}
+#else
+	DeprecatedMemberWarn& operator=(const DeprecatedMemberWarn& rhs) = default;
+#endif
+
+	DeprecatedMemberWarn& operator=(const T& rhs)
+	{
+#if EAT_ASSERT_ENABLED
+		static bool hasTriggered = false;
+		WarnOnce(&hasTriggered, "Client code is accessing a deprecated structure member.");
+#endif
+		this->mValue = rhs;
+		return *this;
+	}
+
+	DeprecatedMemberWarn& operator=(T&& rhs)
+	{
+#if EAT_ASSERT_ENABLED
+		static bool hasTriggered = false;
+		WarnOnce(&hasTriggered, "Client code is accessing a deprecated structure member.");
+#endif
+		this->mValue = rhs;
+		return *this;
+	}
+
+	operator T() const
+	{
+#if EAT_ASSERT_ENABLED
+		static bool hasTriggered = false;
+		WarnOnce(&hasTriggered, "Client code is accessing a deprecated structure member.");
+#endif
+		return mValue;
+	}
+
+	// accessor for fetching the value without tripping the error
+	const T& GetValue() const { return mValue; }
+
+	// TODO: use sfinae to enable/disable when the wrapped type supports dereferencing
+	//auto operator->() const
+	//{
+		//return T::operator->(mValue);
+	//}
+
+
+private:
+	T mValue;
+	int foo;
+};
+
+// This template allows the creation of classes that implicitly convert to the wrapped type but will assert on assignment.
+// This is useful in removing public member variables from our public API.  The goal here is to provide a softer nudge
+// than a build error.  Deprecation markup on the member is a similar approach but will often trigger a build failure
+// as warnings as errors is commonly enabled.
+// TODO: does not work for types that support dereferencing
+template <typename T>
+class DeprecatedMemberError
+{
+public:
+#ifdef EA_COMPILER_NO_DEFAULTED_FUNCTIONS
+	DeprecatedMemberError(){};
+#else
+	DeprecatedMemberError() = default;
+#endif
+	DeprecatedMemberError(T rhs): mValue(rhs) {}
+
+	//DeprecatedMemberError& operator=(DeprecatedMemberError&&) = default; // TODO: Why doesn't this work
+#ifdef EA_COMPILER_NO_DEFAULTED_FUNCTIONS
+	DeprecatedMemberError& operator=(const DeprecatedMemberError& rhs)
+	{
+		mValue = rhs.mValue;
+		return *this;
+	};
+#else
+	DeprecatedMemberError& operator=(const DeprecatedMemberError& rhs) = default;
+#endif
+
+	DeprecatedMemberError& operator=(const T& rhs)
+	{
+#if EAT_ASSERT_ENABLED
+		static bool hasTriggered = false;
+		ErrorOnce(&hasTriggered, "Client code is accessing a deprecated structure member.");
+#endif
+		this->mValue = rhs;
+		return *this;
+	}
+
+	DeprecatedMemberError& operator=(T&& rhs)
+	{
+#if EAT_ASSERT_ENABLED
+		static bool hasTriggered = false;
+		ErrorOnce(&hasTriggered, "Client code is accessing a deprecated structure member.");
+#endif
+		this->mValue = rhs;
+		return *this;
+	}
+
+	operator T() const
+	{
+#if EAT_ASSERT_ENABLED
+		static bool hasTriggered = false;
+		ErrorOnce(&hasTriggered, "Client code is accessing a deprecated structure member.");
+#endif
+		return mValue;
+	}
+
+	// accessor for fetching the value without tripping the error
+	const T& GetValue() const { return mValue; }
+
+private:
+	T mValue;
+};
+
+}} // end namespace EA::Thread
+
+#endif
+

+ 15 - 0
include/eathread/internal/dllinfo.h

@@ -0,0 +1,15 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_DLLINFO_H
+#define EATHREAD_DLLINFO_H
+
+
+#include <eathread/internal/config.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#endif

+ 143 - 0
include/eathread/internal/eathread_atomic.h

@@ -0,0 +1,143 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////////////////////////////
+// eathread_atomic.h
+//
+// Defines functionality for thread-safe primitive operations.
+// 
+// EAThread atomics do NOT imply the use of read/write barriers.  This is 
+// partly due to historical reasons and partly due to EAThread's internal 
+// code being optimized for not using barriers.
+//
+// In future, we are considering migrating the atomics interface which  
+// defaults atomics to use full read/write barriers while allowing users
+// to opt-out of full barrier usage.  The new C++11 interface already provides
+// similar interfaces.
+//
+// http://en.cppreference.com/w/cpp/atomic/memory_order
+//
+// Created by Rob Parolin
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_EATHREAD_ATOMIC_H
+#define EATHREAD_INTERNAL_EATHREAD_ATOMIC_H
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+#include <atomic>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// class AtomicInt
+		///
+		/// Implements thread-safe access to an integer and primary operations on that integer.
+		/// AtomicIntegers are commonly used as lightweight flags and signals between threads
+		/// or as the synchronization object for spinlocks. Those familiar with the Win32 API
+		/// will find that AtomicInt32 is essentially a platform independent interface to 
+		/// the Win32 InterlockedXXX family of functions. Those familiar with Linux may 
+		/// find that AtomicInt32 is essentially a platform independent interface to atomic_t 
+		/// functionality.
+		///
+		/// Note that the reference implementation defined here is itself not thread-safe.
+		/// A thread-safe version requires platform-specific code.
+		///
+		/// Example usage
+		///     AtomicInt32 i = 0;
+		///
+		///     ++i;
+		///     i--;
+		///     i += 7;
+		///     i -= 3;
+		///     i = 2;
+		///     
+		///     int x = i.GetValue();
+		///     i.Increment();
+		///     bool oldValueWas6 = i.SetValueConditional(3, 6);
+		///     i.Add(4);
+		///
+		template <class T>
+		class AtomicInt
+		{
+		public:
+			typedef AtomicInt<T> ThisType;
+			typedef T ValueType;
+
+			/// AtomicInt
+			/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+			/// This is done so that an AtomicInt acts like a standard built-in integer.
+			AtomicInt()
+				{}
+
+			AtomicInt(ValueType n) 
+				{ SetValue(n); }
+
+			AtomicInt(const ThisType& x) 
+				{ SetValue(x.GetValue()); }
+
+			AtomicInt& operator=(const ThisType& x)
+				{ SetValue(x.GetValue()); return *this; }
+
+			ValueType GetValue() const 
+				{ return mValue.load(); }
+
+			ValueType GetValueRaw() const
+				{ return mValue; }
+
+			ValueType SetValue(ValueType n)
+				{ return mValue.exchange(n); }
+
+			bool SetValueConditional(ValueType n, ValueType condition)
+				{ return mValue.compare_exchange_strong(condition, n); }
+
+			ValueType Increment()
+				{ return mValue.operator++(); }
+
+			ValueType Decrement()
+				{ return mValue.operator--(); }
+
+			ValueType Add(ValueType n)
+				{ return mValue.fetch_add(n) + n; }
+
+			// operators
+			inline            operator const ValueType() const { return GetValue(); }
+			inline ValueType  operator =(ValueType n)          { return mValue.operator=(n); }
+			inline ValueType  operator+=(ValueType n)          { return mValue.operator+=(n); }
+			inline ValueType  operator-=(ValueType n)          { return mValue.operator-=(n); }
+			inline ValueType  operator++()                     { return mValue.operator++(); }
+			inline ValueType  operator++(int)                  { return mValue.operator++(0); }
+			inline ValueType  operator--()                     { return mValue.operator--(); }
+			inline ValueType  operator--(int)                  { return mValue.operator--(0); }
+
+		protected:
+			std::atomic<ValueType> mValue;
+		};
+
+	} // namespace Thread
+} // namespace EA
+
+
+#endif // EATHREAD_INTERNAL_EATHREAD_ATOMIC_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 36 - 0
include/eathread/internal/eathread_atomic_standalone.h

@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/// Standalone atomic functions
+/// These act the same as the class functions below.
+/// The T return values are the previous value, except for the
+/// AtomicFetchSwap function which returns the swapped out value.
+///
+/// T    AtomicGetValue(volatile T*);
+/// T    AtomicGetValue(const volatile T*);
+/// void AtomicSetValue(volatile T*, T value);
+/// T    AtomicFetchIncrement(volatile T*);
+/// T    AtomicFetchDecrement(volatile T*);
+/// T    AtomicFetchAdd(volatile T*, T value);
+/// T    AtomicFetchSub(volatile T*, T value);
+/// T    AtomicFetchOr(volatile T*, T value);
+/// T    AtomicFetchAnd(volatile T*, T value);
+/// T    AtomicFetchXor(volatile T*, T value);
+/// T    AtomicFetchSwap(volatile T*, T value);
+/// T    AtomicFetchSwapConditional(volatile T*, T value, T condition);
+/// bool AtomicSetValueConditional(volatile T*, T value, T condition);
+
+#if defined(EA_COMPILER_MSVC)
+	#include <eathread/internal/eathread_atomic_standalone_msvc.h>
+#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+	#include <eathread/internal/eathread_atomic_standalone_gcc.h>
+#else
+	#error unsupported platform
+#endif
+
+

+ 199 - 0
include/eathread/internal/eathread_atomic_standalone_gcc.h

@@ -0,0 +1,199 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+namespace EA
+{
+namespace Thread
+{
+
+// TODO(rparolin): Consider use of clang builtin __sync_swap.
+// https://clang.llvm.org/docs/LanguageExtensions.html#sync-swap
+
+// TODO(rparolin):  Consider use of C11 atomics
+// https://clang.llvm.org/docs/LanguageExtensions.html#c11-atomic-builtins
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr);
+} // namespace detail
+
+// int
+inline int AtomicGetValue(volatile int* ptr) { return detail::AtomicGetValue(ptr); }
+inline int AtomicGetValue(const volatile int* ptr) { return AtomicGetValue(const_cast<volatile int*>(ptr)); }
+inline int AtomicSetValue(volatile int* dest, int value) { return __sync_lock_test_and_set(dest, value); }
+inline int AtomicFetchIncrement(volatile int* dest) { return __sync_fetch_and_add(dest, int(1)); }
+inline int AtomicFetchDecrement(volatile int* dest) { return __sync_fetch_and_add(dest, int(-1)); }
+inline int AtomicFetchAdd(volatile int* dest, int value) { return __sync_fetch_and_add(dest, value); }
+inline int AtomicFetchSub(volatile int* dest, int value) { return __sync_fetch_and_sub(dest, value); }
+inline int AtomicFetchOr(volatile int* dest, int value) { return __sync_fetch_and_or(dest, value); }
+inline int AtomicFetchAnd(volatile int* dest, int value) { return __sync_fetch_and_and(dest, value); }
+inline int AtomicFetchXor(volatile int* dest, int value) { return __sync_fetch_and_xor(dest, value); }
+inline int AtomicFetchSwap(volatile int* dest, int value) { return __sync_lock_test_and_set(dest, value); }
+inline int AtomicFetchSwapConditional(volatile int* dest, int value, int condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile int* dest, int value, int condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// unsigned int
+inline unsigned int AtomicGetValue(volatile unsigned int* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned int AtomicGetValue(const volatile unsigned int* ptr) { return AtomicGetValue(const_cast<volatile unsigned int*>(ptr)); }
+inline unsigned int AtomicSetValue(volatile unsigned int* dest, unsigned int value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned int AtomicFetchIncrement(volatile unsigned int* dest) { return __sync_fetch_and_add(dest, (unsigned int)(1)); }
+inline unsigned int AtomicFetchDecrement(volatile unsigned int* dest) { return __sync_fetch_and_add(dest, (unsigned int)(-1)); }
+inline unsigned int AtomicFetchAdd(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_add(dest, value); } 
+inline unsigned int AtomicFetchSub(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned int AtomicFetchOr(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned int AtomicFetchAnd(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned int AtomicFetchXor(volatile unsigned int* dest, unsigned int value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned int AtomicFetchSwap(volatile unsigned int* dest, unsigned int value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned int AtomicFetchSwapConditional(volatile unsigned int* dest, unsigned int value, unsigned int condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned int* dest, unsigned int value, unsigned int condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// short
+inline short AtomicGetValue(volatile short* ptr) { return detail::AtomicGetValue(ptr); }
+inline short AtomicGetValue(const volatile short* ptr) { return AtomicGetValue(const_cast<volatile short*>(ptr)); }
+inline short AtomicSetValue(volatile short* dest, short value) { return __sync_lock_test_and_set(dest, value); }
+inline short AtomicFetchIncrement(volatile short* dest) { return __sync_fetch_and_add(dest, short(1)); }
+inline short AtomicFetchDecrement(volatile short* dest) { return __sync_fetch_and_add(dest, short(-1)); }
+inline short AtomicFetchAdd(volatile short* dest, short value) { return __sync_fetch_and_add(dest, value); }
+inline short AtomicFetchSub(volatile short* dest, short value) { return __sync_fetch_and_sub(dest, value); }
+inline short AtomicFetchOr(volatile short* dest, short value) { return __sync_fetch_and_or(dest, value); }
+inline short AtomicFetchAnd(volatile short* dest, short value) { return __sync_fetch_and_and(dest, value); }
+inline short AtomicFetchXor(volatile short* dest, short value) { return __sync_fetch_and_xor(dest, value); }
+inline short AtomicFetchSwap(volatile short* dest, short value) { return __sync_lock_test_and_set(dest, value); }
+inline short AtomicFetchSwapConditional(volatile short* dest, short value, short condition) { return __sync_val_compare_and_swap(reinterpret_cast<volatile unsigned short*>(dest), static_cast<unsigned short>(condition), static_cast<unsigned short>(value)); }
+inline bool AtomicSetValueConditional(volatile short* dest, short value, short condition) { return __sync_bool_compare_and_swap(reinterpret_cast<volatile unsigned short*>(dest), static_cast<unsigned short>(condition), static_cast<unsigned short>(value)); }
+
+// unsigned short
+inline unsigned short AtomicGetValue(volatile unsigned short* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned short AtomicGetValue(const volatile unsigned short* ptr) { return AtomicGetValue(const_cast<volatile unsigned short*>(ptr)); }
+inline unsigned short AtomicSetValue(volatile unsigned short* dest, unsigned short value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned short AtomicFetchIncrement(volatile unsigned short* dest) { return __sync_fetch_and_add(dest, (unsigned short)(1)); }
+inline unsigned short AtomicFetchDecrement(volatile unsigned short* dest) { return __sync_fetch_and_add(dest, (unsigned short)(-1)); }
+inline unsigned short AtomicFetchAdd(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_add(dest, value); }
+inline unsigned short AtomicFetchSub(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned short AtomicFetchOr(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned short AtomicFetchAnd(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned short AtomicFetchXor(volatile unsigned short* dest, unsigned short value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned short AtomicFetchSwap(volatile unsigned short* dest, unsigned short value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned short AtomicFetchSwapConditional(volatile unsigned short* dest, unsigned short value, unsigned short condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned short* dest, unsigned short value, unsigned short condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// long
+inline long AtomicGetValue(volatile long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long AtomicGetValue(const volatile long* ptr) { return AtomicGetValue(const_cast<volatile long*>(ptr)); }
+inline long AtomicSetValue(volatile long* dest, long value) { return __sync_lock_test_and_set(dest, value); }
+inline long AtomicFetchIncrement(volatile long* dest) { return __sync_fetch_and_add(dest, long(1)); }
+inline long AtomicFetchDecrement(volatile long* dest) { return __sync_fetch_and_add(dest, long(-1)); }
+inline long AtomicFetchAdd(volatile long* dest, long value) { return __sync_fetch_and_add(dest, value); }
+inline long AtomicFetchSub(volatile long* dest, long value) { return __sync_fetch_and_sub(dest, value); }
+inline long AtomicFetchOr(volatile long* dest, long value) { return __sync_fetch_and_or(dest, value); }
+inline long AtomicFetchAnd(volatile long* dest, long value) { return __sync_fetch_and_and(dest, value); }
+inline long AtomicFetchXor(volatile long* dest, long value) { return __sync_fetch_and_xor(dest, value); }
+inline long AtomicFetchSwap(volatile long* dest, long value) { return __sync_lock_test_and_set(dest, value); }
+inline long AtomicFetchSwapConditional(volatile long* dest, long value, long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile long* dest, long value, long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// unsigned long
+inline unsigned long AtomicGetValue(volatile unsigned long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long AtomicGetValue(const volatile unsigned long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long*>(ptr)); }
+inline unsigned long AtomicSetValue(volatile unsigned long* dest, unsigned long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long AtomicFetchIncrement(volatile unsigned long* dest) { return __sync_fetch_and_add(dest, (unsigned long)(1)); }
+inline unsigned long AtomicFetchDecrement(volatile unsigned long* dest) { return __sync_fetch_and_add(dest, (unsigned long)(-1)); }
+inline unsigned long AtomicFetchAdd(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_add(dest, value); }
+inline unsigned long AtomicFetchSub(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned long AtomicFetchOr(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned long AtomicFetchAnd(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned long AtomicFetchXor(volatile unsigned long* dest, unsigned long value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned long AtomicFetchSwap(volatile unsigned long* dest, unsigned long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long AtomicFetchSwapConditional(volatile unsigned long* dest, unsigned long value, unsigned long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned long* dest, unsigned long value, unsigned long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// char32_t 
+#if EA_CHAR32_NATIVE
+	inline char32_t AtomicGetValue(volatile char32_t* ptr) { return detail::AtomicGetValue(ptr); }
+	inline char32_t AtomicGetValue(const volatile char32_t* ptr) { return AtomicGetValue(const_cast<volatile char32_t*>(ptr)); }
+    inline char32_t AtomicSetValue(volatile char32_t* dest, char32_t value) { return __sync_lock_test_and_set(dest, value); }
+	inline char32_t AtomicFetchIncrement(volatile char32_t* dest) { return __sync_fetch_and_add(dest, char32_t(1)); }
+	inline char32_t AtomicFetchDecrement(volatile char32_t* dest) { return __sync_fetch_and_add(dest, char32_t(-1)); }
+	inline char32_t AtomicFetchAdd(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_add(dest, value); }
+	inline char32_t AtomicFetchSub(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_sub(dest, value); }
+	inline char32_t AtomicFetchOr(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_or(dest, value); }
+	inline char32_t AtomicFetchAnd(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_and(dest, value); }
+	inline char32_t AtomicFetchXor(volatile char32_t* dest, char32_t value) { return __sync_fetch_and_xor(dest, value); }
+	inline char32_t AtomicFetchSwap(volatile char32_t* dest, char32_t value) { return __sync_lock_test_and_set(dest, value); }
+	inline char32_t AtomicFetchSwapConditional(volatile char32_t* dest, char32_t value, char32_t condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+	inline bool AtomicSetValueConditional(volatile char32_t* dest, char32_t value, char32_t condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+#endif
+
+// long long
+inline long long AtomicGetValue(volatile long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long long AtomicGetValue(const volatile long long* ptr) { return AtomicGetValue(const_cast<volatile long long*>(ptr)); }
+inline long long AtomicSetValue(volatile long long* dest, long long value) { return __sync_lock_test_and_set(dest, value); }
+inline long long AtomicFetchIncrement(volatile long long* dest) { return __sync_fetch_and_add(dest, (long long)(1)); }
+inline long long AtomicFetchDecrement(volatile long long* dest) { return __sync_fetch_and_add(dest, (long long)(-1)); }
+inline long long AtomicFetchAdd(volatile long long* dest, long long value) { return __sync_fetch_and_add(dest, value); }
+inline long long AtomicFetchSub(volatile long long* dest, long long value) { return __sync_fetch_and_sub(dest, value); }
+inline long long AtomicFetchOr(volatile long long* dest, long long value) { return __sync_fetch_and_or(dest, value); }
+inline long long AtomicFetchAnd(volatile long long* dest, long long value) { return __sync_fetch_and_and(dest, value); }
+inline long long AtomicFetchXor(volatile long long* dest, long long value) { return __sync_fetch_and_xor(dest, value); }
+inline long long AtomicFetchSwap(volatile long long* dest, long long value) { return __sync_lock_test_and_set(dest, value); }
+inline long long AtomicFetchSwapConditional(volatile long long* dest, long long value, long long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile long long* dest, long long value, long long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+// unsigned long long
+inline unsigned long long AtomicGetValue(volatile unsigned long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long long AtomicGetValue(const volatile unsigned long long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long long*>(ptr)); }
+inline unsigned long long AtomicSetValue(volatile unsigned long long* dest, unsigned long long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long long AtomicFetchIncrement(volatile unsigned long long* dest) { return __sync_fetch_and_add(dest, (unsigned long long)(1)); }
+inline unsigned long long AtomicFetchDecrement(volatile unsigned long long* dest) { return __sync_fetch_and_add(dest, (unsigned long long)(-1)); }
+inline unsigned long long AtomicFetchAdd(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_add(dest, value); }
+inline unsigned long long AtomicFetchSub(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_sub(dest, value); }
+inline unsigned long long AtomicFetchOr(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_or(dest, value); }
+inline unsigned long long AtomicFetchAnd(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_and(dest, value); }
+inline unsigned long long AtomicFetchXor(volatile unsigned long long* dest, unsigned long long value) { return __sync_fetch_and_xor(dest, value); }
+inline unsigned long long AtomicFetchSwap(volatile unsigned long long* dest, unsigned long long value) { return __sync_lock_test_and_set(dest, value); }
+inline unsigned long long AtomicFetchSwapConditional(volatile unsigned long long* dest, unsigned long long value, unsigned long long condition) { return __sync_val_compare_and_swap(dest, condition, value); }
+inline bool AtomicSetValueConditional(volatile unsigned long long* dest, unsigned long long value, unsigned long long condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+
+//	
+// You can not simply define a template for the above atomics due to the explicit 128bit overloads
+// below.  The compiler will prefer those overloads during overload resolution and attempt to convert
+// temporaries as they are more specialized than a template.
+//
+// template<typename T> inline T AtomicGetValue(volatile T* source) { return __sync_fetch_and_add(source, (T)(0)); }
+// template<typename T> inline void AtomicSetValue(volatile T* dest, T value) { __sync_lock_test_and_set(dest, value); }
+// template<typename T> inline T AtomicFetchIncrement(volatile T* dest) { return __sync_fetch_and_add(dest, (T)(1)); }
+// template<typename T> inline T AtomicFetchDecrement(volatile T* dest) { return __sync_fetch_and_add(dest, (T)(-1)); }
+// template<typename T> inline T AtomicFetchAdd(volatile T* dest, T value) { return __sync_fetch_and_add(dest, value); }
+// template<typename T> inline T AtomicFetchOr(volatile T* dest, T value) { return __sync_fetch_and_or(dest, value); }
+// template<typename T> inline T AtomicFetchAnd(volatile T* dest, T value) { return __sync_fetch_and_and(dest, value); }
+// template<typename T> inline T AtomicFetchXor(volatile T* dest, T value) { return __sync_fetch_and_xor(dest, value); }
+// template<typename T> inline T AtomicFetchSwap(volatile T* dest, T value) { return __sync_lock_test_and_set(dest, value); }
+// template<typename T> inline bool AtomicSetValueConditional(volatile T* dest, T value, T condition) { return __sync_bool_compare_and_swap(dest, condition, value); }
+//
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr)
+	{
+	#if EA_PLATFORM_WORD_SIZE >= 8 && defined(EA_PROCESSOR_X86_64)
+		EATHREAD_ALIGNMENT_CHECK(ptr);
+		EACompilerMemoryBarrier();
+		T value = *ptr;
+		EACompilerMemoryBarrier();
+		return value;
+	#else
+		return AtomicFetchAdd(ptr, T(0));
+	#endif
+	}
+} // namespace detail
+
+} // namespace Thread
+} // namespace EA
+

+ 249 - 0
include/eathread/internal/eathread_atomic_standalone_msvc.h

@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////
+// InterlockedXXX intrinsics
+//
+#if defined(EA_PLATFORM_MICROSOFT)
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <xatomic.h>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+	extern "C" long           _InterlockedIncrement(long volatile* Addend);
+	extern "C" long           _InterlockedDecrement(long volatile* Addend);
+	extern "C" long           _InterlockedCompareExchange(long volatile* Dest, long Exchange, long Comp);
+	extern "C" long           _InterlockedExchange(long volatile* Target, long Value);
+	extern "C" long           _InterlockedExchangeAdd(long volatile* Addend, long Value);
+	extern "C" int64_t        _InterlockedCompareExchange64(int64_t volatile* Dest, int64_t Exchange, int64_t Comp);
+
+	#pragma intrinsic (_InterlockedCompareExchange)
+	#define InterlockedCompareExchangeImp _InterlockedCompareExchange
+
+	#pragma intrinsic (_InterlockedExchange)
+	#define InterlockedExchangeImp        _InterlockedExchange 
+
+	#pragma intrinsic (_InterlockedExchangeAdd)
+	#define InterlockedExchangeAddImp     _InterlockedExchangeAdd
+
+	#pragma intrinsic (_InterlockedIncrement)
+	#define InterlockedIncrementImp       _InterlockedIncrement
+
+	#pragma intrinsic (_InterlockedDecrement)
+	#define InterlockedDecrementImp       _InterlockedDecrement
+
+	#pragma intrinsic (_InterlockedCompareExchange64)
+	#define InterlockedCompareExchange64Imp _InterlockedCompareExchange64
+
+	inline bool InterlockedSetIfEqual(volatile int64_t* dest, int64_t newValue, int64_t condition)
+	{
+		return (InterlockedCompareExchange64Imp(dest, newValue, condition) == condition);
+	}
+
+	inline bool InterlockedSetIfEqual(volatile uint64_t* dest, uint64_t newValue, uint64_t condition)
+	{
+		return (InterlockedCompareExchange64Imp((int64_t volatile*)dest, (int64_t)newValue, (int64_t)condition) == (int64_t)condition);
+	}
+
+	#ifndef InterlockedCompareExchangeImp // If the above intrinsics aren't used... 
+		extern "C" __declspec(dllimport) long __stdcall InterlockedIncrement(long volatile * pAddend);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedDecrement(long volatile * pAddend);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedExchange(long volatile * pTarget, long value);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedExchangeAdd(long volatile * pAddend, long value);
+		extern "C" __declspec(dllimport) long __stdcall InterlockedCompareExchange(long volatile * pDestination, long value, long compare);
+
+		#define InterlockedCompareExchangeImp InterlockedCompareExchange
+		#define InterlockedExchangeImp        InterlockedExchange
+		#define InterlockedExchangeAddImp     InterlockedExchangeAdd
+		#define InterlockedIncrementImp       InterlockedIncrement
+		#define InterlockedDecrementImp       InterlockedDecrement
+	#endif
+
+	#if defined(EA_PROCESSOR_X86)
+		#define _InterlockedExchange64		_InterlockedExchange64_INLINE
+		#define _InterlockedExchangeAdd64	_InterlockedExchangeAdd64_INLINE
+		#define _InterlockedAnd64			_InterlockedAnd64_INLINE
+		#define _InterlockedOr64			_InterlockedOr64_INLINE
+		#define _InterlockedXor64			_InterlockedXor64_INLINE
+	#endif
+#endif // EA_PLATFORM_MICROSOFT
+
+
+
+
+namespace EA
+{
+namespace Thread
+{
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr);
+} // namespace detail
+
+// int
+inline int AtomicGetValue(volatile int* ptr) { return detail::AtomicGetValue(ptr); }
+inline int AtomicGetValue(const volatile int* ptr) { return AtomicGetValue(const_cast<volatile int*>(ptr)); }
+inline int AtomicSetValue(volatile int* ptr, int value) { return _InterlockedExchange((long*)ptr, (long)value); }  
+inline int AtomicFetchIncrement(volatile int* ptr) { return static_cast<int>(_InterlockedIncrement((long*)ptr)) - 1; }
+inline int AtomicFetchDecrement(volatile int* ptr) { return static_cast<int>(_InterlockedDecrement((long*)ptr)) + 1; }
+inline int AtomicFetchAdd(volatile int* ptr, int value) { return static_cast<int>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+inline int AtomicFetchSub(volatile int* ptr, int value) { return static_cast<int>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+inline int AtomicFetchOr(volatile int* ptr, int value) { return static_cast<int>(_InterlockedOr((long*)ptr, (long)value)); }
+inline int AtomicFetchAnd(volatile int* ptr, int value) { return static_cast<int>(_InterlockedAnd((long*)ptr, (long)value)); }
+inline int AtomicFetchXor(volatile int* ptr, int value) { return static_cast<int>(_InterlockedXor((long*)ptr, (long)value)); }
+inline int AtomicFetchSwap(volatile int* ptr, int value) { return static_cast<int>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline int AtomicFetchSwapConditional(volatile int* ptr, int value, int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition); }
+inline bool AtomicSetValueConditional(volatile int* ptr, int value, int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition) == (long)condition; }
+
+// unsigned int
+inline unsigned int AtomicGetValue(volatile unsigned int* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned int AtomicGetValue(const volatile unsigned int* ptr) { return AtomicGetValue(const_cast<volatile unsigned int*>(ptr)); }
+inline unsigned int AtomicSetValue(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchange((long*)ptr, (long)value)); }  
+inline unsigned int AtomicFetchIncrement(volatile unsigned int* ptr) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, (long)1)); }
+inline unsigned int AtomicFetchDecrement(volatile unsigned int* ptr) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, (long)-1)); }
+inline unsigned int AtomicFetchAdd(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchSub(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+inline unsigned int AtomicFetchOr(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedOr((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchAnd(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedAnd((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchXor(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedXor((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchSwap(volatile unsigned int* ptr, unsigned int value) { return static_cast<unsigned int>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline unsigned int AtomicFetchSwapConditional(volatile unsigned int* ptr, unsigned int value, unsigned int condition) { return (unsigned int)_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition); }
+inline bool AtomicSetValueConditional(volatile unsigned int* ptr, unsigned int value, unsigned int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition) == (long)condition; }
+
+// short
+inline short AtomicGetValue(volatile short* ptr) { return detail::AtomicGetValue(ptr); }
+inline short AtomicGetValue(const volatile short* ptr) { return AtomicGetValue(const_cast<volatile short*>(ptr)); }
+inline short AtomicSetValue(volatile short* ptr, short value) { return static_cast<short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline short AtomicFetchIncrement(volatile short* ptr) { return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, (short)1)); }
+inline short AtomicFetchDecrement(volatile short* ptr) { return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, (short)-1)); }
+inline short AtomicFetchAdd(volatile short* ptr, short value) { return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, (short)value)); }
+inline short AtomicFetchSub(volatile short* ptr, short value) {  return static_cast<short>(_InterlockedExchangeAdd16((short*)ptr, -value)); } 
+inline short AtomicFetchOr(volatile short* ptr, short value) { return static_cast<short>(_InterlockedOr16((short*)ptr, (short)value)); }
+inline short AtomicFetchAnd(volatile short* ptr, short value) { return static_cast<short>(_InterlockedAnd16((short*)ptr, (short)value)); }
+inline short AtomicFetchXor(volatile short* ptr, short value) { return static_cast<short>(_InterlockedXor16((short*)ptr, (short)value)); }
+inline short AtomicFetchSwap(volatile short* ptr, short value) { return static_cast<short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline short AtomicFetchSwapConditional(volatile short* ptr, short value, short condition) { return _InterlockedCompareExchange16(ptr, value, condition); }
+inline bool AtomicSetValueConditional(volatile short* ptr, short value, short condition) { return _InterlockedCompareExchange16(ptr, value, condition) == condition; }
+
+// unsigned short
+inline unsigned short AtomicGetValue(volatile unsigned short* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned short AtomicGetValue(const volatile unsigned short* ptr) { return AtomicGetValue(const_cast<volatile unsigned short*>(ptr)); }
+inline unsigned short AtomicSetValue(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchIncrement(volatile unsigned short* ptr) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, (short)1)); }
+inline unsigned short AtomicFetchDecrement(volatile unsigned short* ptr) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, (short)-1)); }
+inline unsigned short AtomicFetchAdd(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchSub(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchangeAdd16((short*)ptr, -(short)value)); }
+inline unsigned short AtomicFetchOr(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedOr16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchAnd(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedAnd16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchXor(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedXor16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchSwap(volatile unsigned short* ptr, unsigned short value) { return static_cast<unsigned short>(_InterlockedExchange16((short*)ptr, (short)value)); }
+inline unsigned short AtomicFetchSwapConditional(volatile unsigned short* ptr, unsigned short value, unsigned short condition) { return (unsigned short)_InterlockedCompareExchange16((short*)ptr, (short)value, (short)condition); }
+inline bool AtomicSetValueConditional(volatile unsigned short* ptr, unsigned short value, unsigned short condition) { return _InterlockedCompareExchange16((short*)ptr, (short)value, (short)condition) == (short)condition; }
+
+// long
+inline long AtomicGetValue(volatile long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long AtomicGetValue(const volatile long* ptr) { return AtomicGetValue(const_cast<volatile long*>(ptr)); }
+inline long AtomicSetValue(volatile long* ptr, long value) { return _InterlockedExchange(ptr, value); }
+inline long AtomicFetchIncrement(volatile long* ptr) { return _InterlockedIncrement(ptr) - 1; }
+inline long AtomicFetchDecrement(volatile long* ptr) { return _InterlockedDecrement(ptr) + 1; }
+inline long AtomicFetchAdd(volatile long* ptr, long value)  { return _InterlockedExchangeAdd(ptr, value); }
+inline long AtomicFetchSub(volatile long* ptr, long value) { return _InterlockedExchangeAdd(ptr, -value); }
+inline long AtomicFetchOr(volatile long* ptr, long value)   { return _InterlockedOr(ptr, value); }
+inline long AtomicFetchAnd(volatile long* ptr, long value)  { return _InterlockedAnd(ptr, value); }
+inline long AtomicFetchXor(volatile long* ptr, long value)  { return _InterlockedXor(ptr, value); }
+inline long AtomicFetchSwap(volatile long* ptr, long value) { return _InterlockedExchange(ptr, value); }
+inline long AtomicFetchSwapConditional(volatile long* ptr, long value, long condition) { return _InterlockedCompareExchange(ptr, value, condition); }
+inline bool AtomicSetValueConditional(volatile long* ptr, long value, long condition) { return _InterlockedCompareExchange(ptr, value, condition) == condition; }
+
+// unsigned long
+inline unsigned long AtomicGetValue(volatile unsigned long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long AtomicGetValue(const volatile unsigned long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long*>(ptr)); }
+inline unsigned long AtomicSetValue(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchIncrement(volatile unsigned long* ptr) { return static_cast<unsigned long>(_InterlockedIncrement((long*)ptr)) - 1; }
+inline unsigned long AtomicFetchDecrement(volatile unsigned long* ptr) { return static_cast<unsigned long>(_InterlockedDecrement((long*)ptr)) + 1; }
+inline unsigned long AtomicFetchAdd(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchSub(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+inline unsigned long AtomicFetchOr(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedOr((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchAnd(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedAnd((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchXor(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedXor((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchSwap(volatile unsigned long* ptr, unsigned long value) { return static_cast<unsigned long>(_InterlockedExchange((long*)ptr, (long)value)); }
+inline unsigned long AtomicFetchSwapConditional(volatile unsigned long* ptr, unsigned long value, unsigned long condition) { return static_cast<unsigned long>(_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition)); }
+inline bool AtomicSetValueConditional(volatile unsigned long* ptr, unsigned long value, unsigned long condition) { return static_cast<unsigned long>(_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition)) == condition; }
+
+// char32_t
+#if EA_CHAR32_NATIVE
+	inline char32_t AtomicGetValue(volatile char32_t* ptr) { return detail::AtomicGetValue(ptr); }
+	inline char32_t AtomicGetValue(const volatile char32_t* ptr) { return AtomicGetValue(const_cast<volatile char32_t*>(ptr)); }
+    inline char32_t AtomicSetValue(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchange((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchIncrement(volatile char32_t* ptr) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, (long)1)); }
+	inline char32_t AtomicFetchDecrement(volatile char32_t* ptr) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, (long)-1)); }
+	inline char32_t AtomicFetchAdd(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchSub(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchangeAdd((long*)ptr, -(long)value)); }
+	inline char32_t AtomicFetchOr(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedOr((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchAnd(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedAnd((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchXor(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedXor((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchSwap(volatile char32_t* ptr, char32_t value) { return static_cast<char32_t>(_InterlockedExchange((long*)ptr, (long)value)); }
+	inline char32_t AtomicFetchSwapConditional(volatile char32_t* ptr, char32_t value, unsigned int condition) { return static_cast<char32_t>(_InterlockedCompareExchange((long*)ptr, (long)value, (long)condition)); }
+	inline bool AtomicSetValueConditional(volatile char32_t* ptr, char32_t value, unsigned int condition) { return _InterlockedCompareExchange((long*)ptr, (long)value, (long)condition) == (long)condition; }
+#endif
+
+
+// long long
+inline long long AtomicGetValue(volatile long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline long long AtomicGetValue(const volatile long long* ptr) { return AtomicGetValue(const_cast<volatile long long*>(ptr)); }
+inline long long AtomicSetValue(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchange64(ptr, value)); }  
+inline long long AtomicFetchIncrement(volatile long long* ptr) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, (long long)1)); }
+inline long long AtomicFetchDecrement(volatile long long* ptr) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, (long long)-1)); }
+inline long long AtomicFetchAdd(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, value)); }
+inline long long AtomicFetchSub(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchangeAdd64(ptr, -(long long)value)); }
+inline long long AtomicFetchOr(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedOr64(ptr, value)); }
+inline long long AtomicFetchAnd(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedAnd64(ptr, value)); }
+inline long long AtomicFetchXor(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedXor64(ptr, value)); }
+inline long long AtomicFetchSwap(volatile long long* ptr, long long value) { return static_cast<long long>(_InterlockedExchange64(ptr, value)); }
+inline long long AtomicFetchSwapConditional(volatile long long* ptr, long long value, long long condition) { return _InterlockedCompareExchange64(ptr, value, condition); }
+inline bool AtomicSetValueConditional(volatile long long* ptr, long long value, long long condition) { return _InterlockedCompareExchange64(ptr, value, condition) == condition; }
+
+// unsigned long long 
+inline unsigned long long AtomicGetValue(volatile unsigned long long* ptr) { return detail::AtomicGetValue(ptr); }
+inline unsigned long long AtomicGetValue(const volatile unsigned long long* ptr) { return AtomicGetValue(const_cast<volatile unsigned long long*>(ptr)); }
+inline unsigned long long AtomicSetValue(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchange64(reinterpret_cast<volatile long long*>(ptr), (long long)value)); }  
+inline unsigned long long AtomicFetchIncrement(volatile unsigned long long* ptr) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), (long long)1)); }
+inline unsigned long long AtomicFetchDecrement(volatile unsigned long long* ptr) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), (long long)-1)); }
+inline unsigned long long AtomicFetchAdd(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), (long long)value)); }
+inline unsigned long long AtomicFetchSub(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(ptr), -(long long)value)); }
+inline unsigned long long AtomicFetchOr(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedOr64(reinterpret_cast<volatile long long*>(ptr), (long long)value)); }
+inline unsigned long long AtomicFetchAnd(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedAnd64(reinterpret_cast<volatile long long*>(ptr),(long long) value)); }
+inline unsigned long long AtomicFetchXor(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedXor64(reinterpret_cast<volatile long long*>(ptr),(long long) value)); }
+inline unsigned long long AtomicFetchSwap(volatile unsigned long long* ptr, unsigned long long value) { return static_cast<unsigned long long>(_InterlockedExchange64(reinterpret_cast<volatile long long*>(ptr),(long long) value)); }
+inline unsigned long long AtomicFetchSwapConditional(volatile unsigned long long* ptr, unsigned long long value, unsigned long long condition) { return static_cast<unsigned long long>(_InterlockedCompareExchange64(reinterpret_cast<volatile long long*>(ptr), (long long)value, (long long)condition)); }
+inline bool AtomicSetValueConditional(volatile unsigned long long* ptr, unsigned long long value, unsigned long long condition) { return static_cast<unsigned long long>(_InterlockedCompareExchange64(reinterpret_cast<volatile long long*>(ptr), (long long)value, (long long)condition)) == condition; }
+
+
+namespace detail
+{
+	template<class T>
+	inline T AtomicGetValue(volatile T* ptr)
+	{
+	#if EA_PLATFORM_WORD_SIZE >= 8 && defined(EA_PROCESSOR_X86_64)
+		EATHREAD_ALIGNMENT_CHECK(ptr);
+		EACompilerMemoryBarrier();
+		T value = *ptr;
+		EACompilerMemoryBarrier();
+		return value;
+	#else
+		return AtomicFetchAdd(ptr, T(0));
+	#endif
+	}
+} // namespace detail
+
+
+} // namespace Thread
+} // namespace EA
+

+ 32 - 0
include/eathread/internal/eathread_global.h

@@ -0,0 +1,32 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// NOTE(rparolin):  Provides a unified method of access to EAThread global
+// variables that (when specified by the user) can become DLL safe by adding a
+// dependency on EAStdC EAGlobal implementation.
+/////////////////////////////////////////////////////////////////////////////
+
+#ifndef EATHREAD_INTERNAL_GLOBAL_H
+#define EATHREAD_INTERNAL_GLOBAL_H
+
+#if EATHREAD_GLOBAL_VARIABLE_DLL_SAFETY
+	#include <EAStdC/EAGlobal.h>
+
+	#define EATHREAD_GLOBALVARS (*EA::StdC::AutoStaticOSGlobalPtr<EA::Thread::EAThreadGlobalVars, 0xdabbad00>().get())
+	#define EATHREAD_GLOBALVARS_CREATE_INSTANCE  EA::StdC::AutoStaticOSGlobalPtr<EA::Thread::EAThreadGlobalVars, 0xdabbad00> gGlobalVarsInstance;
+	#define EATHREAD_GLOBALVARS_EXTERN_INSTANCE  
+
+#else 
+	#define EATHREAD_GLOBALVARS gEAThreadGlobalVars
+	#define EATHREAD_GLOBALVARS_CREATE_INSTANCE EA::Thread::EAThreadGlobalVars gEAThreadGlobalVars
+	#define EATHREAD_GLOBALVARS_EXTERN_INSTANCE extern EA::Thread::EAThreadGlobalVars gEAThreadGlobalVars 
+
+#endif
+
+#endif

+ 50 - 0
include/eathread/internal/timings.h

@@ -0,0 +1,50 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+#ifndef EATHREAD_INTERNAL_TIMINGS_H
+#define EATHREAD_INTERNAL_TIMINGS_H
+
+namespace EA
+{
+	namespace Thread
+	{
+		
+#if defined(EA_PLATFORM_SONY)
+		// RelativeTimeoutFromAbsoluteTimeout returns a relative timeout in microseconds.
+		inline uint32_t RelativeTimeoutFromAbsoluteTimeout(EA::Thread::ThreadTime timeoutAbsolute)
+		{
+			using namespace EA::Thread;
+
+			EAT_ASSERT((timeoutAbsolute == kTimeoutImmediate) || (timeoutAbsolute > EATHREAD_MIN_ABSOLUTE_TIME)); // Assert that the user didn't make the mistake of treating time as relative instead of absolute.
+
+			uint32_t timeoutRelative = 0;
+
+			if (timeoutAbsolute == kTimeoutNone)
+			{
+				timeoutRelative = 0xffffffff;
+			}
+			else if (timeoutAbsolute == kTimeoutImmediate)
+			{
+				timeoutRelative = 0;
+			}
+			else
+			{
+				ThreadTime timeCurrent(GetThreadTime());
+				timeoutRelative = (timeoutAbsolute > timeCurrent) ?  EA_THREADTIME_AS_UINT_MICROSECONDS(timeoutAbsolute - timeCurrent) : 0;
+			}
+
+			EAT_ASSERT((timeoutRelative == 0xffffffff) || (timeoutRelative < 100000000)); // Assert that the timeout is a sane value and didn't wrap around.
+	
+			return timeoutRelative;
+		}
+#endif
+
+	}
+}
+
+#endif

+ 559 - 0
include/eathread/powerpc/eathread_atomic_powerpc.h

@@ -0,0 +1,559 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for thread-safe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_POWERPC_EATHREAD_ATOMIC_POWERPC_H
+#define EATHREAD_POWERPC_EATHREAD_ATOMIC_POWERPC_H
+
+
+#ifndef INCLUDED_eabase_H
+	#include <EABase/eabase.h>
+#endif
+#ifndef EATHREAD_EATHREAD_SYNC_H
+	#include <eathread/eathread_sync.h>
+#endif
+#include <stddef.h>
+
+
+///////////////////////////////////////////////////////////////////////////////
+// EATHREAD_XTL_H_ENABLED
+//
+// Defined as 0 or 1. Default is 1, for backward compatibility.
+// If enabled then xtl.h is #included below, merely for backward compatibility.
+//
+#ifndef EATHREAD_XTL_H_ENABLED
+	#define EATHREAD_XTL_H_ENABLED 1
+#endif
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+
+#ifdef _MSC_VER
+	 #pragma warning(push)
+	 #pragma warning(disable: 4146)  // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+
+#if defined(EA_PROCESSOR_POWERPC)
+	#define EA_THREAD_ATOMIC_IMPLEMENTED
+	#define EA_THREAD_ATOMIC_LLR_SUPPORTED (0)
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/* To do
+			inline int32_t AtomicGetValue(volatile int32_t*)
+				{ }
+			inline void AtomicSetValue(volatile int32_t*, int32_t value)
+				{ }
+			inline int32_t AtomicIncrement(volatile int32_t*)
+				{ }
+			inline int32_t AtomicDecrement(volatile int32_t*)
+				{ }
+			inline int32_t AtomicAdd(volatile int32_t*, int32_t value)
+				{ }
+			inline int32_t AtomicOr(volatile int32_t*, int32_t value)
+				{ }
+			inline int32_t AtomicAnd(volatile int32_t*, int32_t value)
+				{ }
+			inline int32_t AtomicXor(volatile int32_t*, int32_t value)
+				{ }
+			inline int32_t AtomicSwap(volatile int32_t*, int32_t value)
+				{ }
+			inline bool AtomicSetValueConditional(volatile int32_t*, int32_t value, int32_t condition)
+				{ }
+
+			inline uint32_t AtomicGetValue(volatile uint32_t*)
+				{ }
+			inline void AtomicSetValue(volatile uint32_t*, uint32_t value)
+				{ }
+			inline uint32_t AtomicIncrement(volatile uint32_t*)
+				{ }
+			inline uint32_t AtomicDecrement(volatile uint32_t*)
+				{ }
+			inline uint32_t AtomicAdd(volatile uint32_t*, uint32_t value)
+				{ }
+			inline uint32_t AtomicOr(volatile uint32_t*, uint32_t value)
+				{ }
+			inline uint32_t AtomicAnd(volatile uint32_t*, uint32_t value)
+				{ }
+			inline uint32_t AtomicXor(volatile uint32_t*, uint32_t value)
+				{ }
+			inline uint32_t AtomicSwap(volatile uint32_t*, uint32_t value)
+				{ }
+			inline bool AtomicSetValueConditional(volatile uint32_t*, uint32_t value, uint32_t condition)
+				{ }
+
+			inline int64_t AtomicGetValue(volatile int64_t*)
+				{ }
+			inline void AtomicSetValue(volatile int64_t*, int64_t value)
+				{ }
+			inline int64_t AtomicIncrement(volatile int64_t*)
+				{ }
+			inline int64_t AtomicDecrement(volatile int64_t*)
+				{ }
+			inline int64_t AtomicAdd(volatile int64_t*, int64_t value)
+				{ }
+			inline int64_t AtomicOr(volatile int64_t*, int64_t value)
+				{ }
+			inline int64_t AtomicAnd(volatile int64_t*, int64_t value)
+				{ }
+			inline int64_t AtomicXor(volatile int64_t*, int64_t value)
+				{ }
+			inline int64_t AtomicSwap(volatile int64_t*, int64_t value)
+				{ }
+			inline bool AtomicSetValueConditional(volatile int64_t*, int64_t value, int64_t condition)
+				{ }
+
+			inline uint64_t AtomicGetValue(volatile uint64_t*)
+				{ }
+			inline void AtomicSetValue(volatile uint64_t*, uint64_t value)
+				{ }
+			inline uint64_t AtomicIncrement(volatile uint64_t*)
+				{ }
+			inline uint64_t AtomicDecrement(volatile uint64_t*)
+				{ }
+			inline uint64_t AtomicAdd(volatile uint64_t*, uint64_t value)
+				{ }
+			inline uint64_t AtomicOr(volatile uint64_t*, uint64_t value)
+				{ }
+			inline uint64_t AtomicAnd(volatile uint64_t*, uint64_t value)
+				{ }
+			inline uint64_t AtomicXor(volatile uint64_t*, uint64_t value)
+				{ }
+			inline uint64_t AtomicSwap(volatile uint64_t*, uint64_t value)
+				{ }
+			inline bool AtomicSetValueConditional(volatile uint64_t*, uint64_t value, uint64_t condition)
+				{ }
+			*/
+
+
+			template <class T>
+			class AtomicInt
+			{
+			public:
+				typedef AtomicInt<T> ThisType;
+				typedef T            ValueType;
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				AtomicInt()
+					{}
+
+				AtomicInt(ValueType n) 
+					{ SetValue(n); }
+
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+				ValueType GetValue() const;
+				ValueType SetValue(ValueType n);
+				bool      SetValueConditional(ValueType n, ValueType condition);
+				ValueType Increment();
+				ValueType Decrement();
+				ValueType Add(ValueType n);
+
+			#if EA_THREAD_ATOMIC_LLR_SUPPORTED
+				ValueType Reserve(void* spuScratch = NULL);
+				bool      StoreConditionalReserved(ValueType n, void* spuScratch = NULL);
+			#endif
+
+				// operators
+				inline            operator const ValueType() const { return GetValue(); }  // Should this be provided? Is it safe enough? Return value of 'const' attempts to make this safe from misuse.
+				inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+				inline ValueType  operator+=(ValueType n)          { return Add(n);}
+				inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+				inline ValueType  operator++()                     { return Increment();}
+				inline ValueType  operator++(int)                  { return Increment() - 1;}
+				inline ValueType  operator--()                     { return Decrement(); }
+				inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+			protected:
+				volatile ValueType mValue;
+			};
+
+				// Template specializations for Generic PowerPC: Macintosh OSX, etc.
+			#if defined(CS_UNDEFINED_STRING) || defined(EA_COMPILER_GNUC) 
+
+				template <> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+				{
+					// The version below uses lwarx directly and not a lwarx/stwcx loop. 
+					// You would want the loop if you are on an SMP system and want the 
+					// returned value to be reflective of the last store to the address
+					// (which would be our store). The downside to the loop is that it 
+					// would be slower due to the extra instruction and due to an extra
+					// memory synchronization event.
+					ValueType nValue;
+					#if (EA_MEMORY_BARRIERS_REQUIRED == 0)
+						__asm__ __volatile__("lwarx  %0,0,%1"
+											: "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+					#else
+						__asm__ __volatile__("1: lwarx  %0,0,%1\n\
+												 stwcx. %0,0,%1\n\
+												  bne 1b"
+											   : "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+					#endif
+					return nValue;
+				}
+
+				template <> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+				{
+					ValueType nValue;
+					#if (EA_MEMORY_BARRIERS_REQUIRED == 0)
+						__asm__ __volatile__("lwarx  %0,0,%1"
+											: "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+					#else
+						__asm__ __volatile__("1: lwarx  %0,0,%1\n\
+												 stwcx. %0,0,%1\n\
+												 bne 1b"
+											   : "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+					#endif
+					return nValue;
+				}
+
+				template <> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+				{
+					ValueType nOriginalValue;
+					__asm__ __volatile__("1: lwarx  %0,0,%2\n\
+											 stwcx. %1,0,%2\n\
+											 bne-    1b"
+										  : "=&b" (nOriginalValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+					return nOriginalValue;
+				}
+
+				template <> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+				{
+					ValueType nOriginalValue;
+					__asm__ __volatile__("1: lwarx  %0,0,%2\n\
+											 stwcx. %1,0,%2\n\
+											 bne-    1b" 
+										   : "=&b" (nOriginalValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+					return nOriginalValue;
+				}
+
+				template <> inline
+				bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+				{
+					ValueType nOriginalValue;
+					__asm__ __volatile__("\n\
+										  1: lwarx  %0,0,%1 \n\
+											 cmpw    0,%0,%2 \n\
+											 bne     2f \n\
+											 stwcx. %3,0,%1 \n\
+											 bne-    1b\n"
+										  "2:"
+											: "=&b" (nOriginalValue)
+											: "b" (&mValue), "r" (condition), "r" (n)
+											: "cc", "memory");
+					return (condition == nOriginalValue);
+				}
+
+				template <> inline
+				bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+				{
+					ValueType nOriginalValue;
+					__asm__ __volatile__("\n\
+										 1: lwarx  %0,0,%1 \n\
+											cmpw    0,%0,%2 \n\
+											bne     2f \n\
+											stwcx. %3,0,%1 \n\
+											bne-    1b\n"
+										"2:"
+											: "=&b" (nOriginalValue)
+											: "b" (&mValue), "r" (condition), "r" (n)
+											: "cc", "memory");
+					return (condition == nOriginalValue);
+				}
+
+				template <> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+				{
+					ValueType nNewValue;
+					__asm__ __volatile__("1: lwarx  %0,0,%1\n\
+											 addi    %0,%0,1\n\
+											 stwcx. %0,0,%1\n\
+											 bne-    1b"
+										   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+					return nNewValue;
+				}
+
+				template <> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+				{
+					ValueType nNewValue;
+					__asm__ __volatile__("1: lwarx  %0,0,%1\n\
+											 addi    %0,%0,1\n\
+											 stwcx. %0,0,%1\n\
+											 bne-    1b"
+										   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+					return nNewValue;
+				}
+
+				template <> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+				{
+					ValueType nNewValue;
+					__asm__ __volatile__("1: lwarx  %0,0,%1\n\
+											 addi    %0,%0,-1\n\
+											 stwcx. %0,0,%1\n\
+											 bne-    1b"
+										   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+					return nNewValue;
+				}
+
+				template <> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+				{
+					ValueType nNewValue;
+					__asm__ __volatile__("1: lwarx  %0,0,%1\n\
+											 addi    %0,%0,-1\n\
+											 stwcx. %0,0,%1\n\
+											 bne-    1b"
+										   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+					return nNewValue;
+				}
+
+				template <> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+				{
+					ValueType nNewValue;
+					__asm__ __volatile__("1: lwarx    %0,0,%2\n\
+											 add      %0,%1,%0\n\
+											 stwcx.  %0,0,%2\n\
+											 bne-     1b"
+										   : "=&b" (nNewValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+					return nNewValue;
+				}
+
+				template <> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+				{
+					ValueType nNewValue;
+					__asm__ __volatile__("1: lwarx    %0,0,%2\n\
+											 add      %0,%1,%0\n\
+											 stwcx.  %0,0,%2\n\
+											 bne-     1b"
+										   : "=&b" (nNewValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+					return nNewValue;
+				}
+
+			#endif // EA_COMPILER_GNUC
+
+			#if (defined(EA_PLATFORM_WORD_SIZE) && (EA_PLATFORM_WORD_SIZE >= 8)) // If we have PowerPC64...
+
+				#if defined(EA_COMPILER_GNUC)
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+					{
+						// The version below uses lwarx directly and not a ldarx/stdcx loop. 
+						// You would want the loop if you are on an SMP system and want the 
+						// returned value to be reflective of the last store to the address
+						// (which would be our store). The downside to the loop is that it 
+						// would be slower due to the extra instruction and due to an extra
+						// memory synchronization event.
+						ValueType nValue;
+						#if (EA_MEMORY_BARRIERS_REQUIRED == 0)
+							 __asm__ __volatile__("ldarx  %0,0,%1"
+												: "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+						#else
+							 __asm__ __volatile__("1: ldarx  %0,0,%1\n\
+													  stdcx. %0,0,%1\n\
+													  bne 1b"
+													: "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+						#endif
+						return nValue;
+					}
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+					{
+						ValueType nValue;
+						#if (EA_MEMORY_BARRIERS_REQUIRED == 0)
+							 __asm__ __volatile__("ldarx  %0,0,%1"
+												   : "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+						#else
+							 __asm__ __volatile__("1: ldarx  %0,0,%1\n\
+													  stdcx. %0,0,%1\n\
+													  bne 1b"
+													: "=&b" (nValue) : "b" (&mValue) : "cc", "memory");
+						#endif
+						return nValue;
+					}
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+					{
+						ValueType nOriginalValue;
+						__asm__ __volatile__("1: ldarx  %0,0,%2\n\
+												 stdcx. %1,0,%2\n\
+												 bne-    1b" 
+											   : "=&b" (nOriginalValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+						return nOriginalValue;
+					}
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+					{
+						ValueType nOriginalValue;
+						__asm__ __volatile__("1: ldarx  %0,0,%2\n\
+												 stdcx. %1,0,%2\n\
+												 bne-    1b" 
+											   : "=&b" (nOriginalValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+						return nOriginalValue;
+					}
+
+					template <> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+					{
+						ValueType nOriginalValue;
+						__asm__ __volatile__("\n\
+											1: ldarx  %0,0,%1 \n\
+												cmpd    0,%0,%2 \n\
+												bne     2f \n\
+												stdcx. %3,0,%1 \n\
+												bne-    1b\n"
+											"2:"
+												: "=&b" (nOriginalValue)
+												: "b" (&mValue), "r" (condition), "r" (n)
+												: "cc", "memory");
+						return (condition == nOriginalValue);
+					}
+
+					template <> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+					{
+						ValueType nOriginalValue;
+						__asm__ __volatile__("\n\
+											1: ldarx  %0,0,%1 \n\
+												cmpd    0,%0,%2 \n\
+												bne     2f \n\
+												stdcx. %3,0,%1 \n\
+												bne-    1b\n"
+											"2:"
+												: "=&b" (nOriginalValue)
+												: "b" (&mValue), "r" (condition), "r" (n)
+												: "cc", "memory");
+						return (condition == nOriginalValue);
+					}
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+					{
+						ValueType nNewValue;
+						__asm__ __volatile__("1: ldarx  %0,0,%1\n\
+												 addi    %0,%0,1\n\
+												 stdcx. %0,0,%1\n\
+												 bne-    1b"
+											   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+						return nNewValue;
+					}
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+					{
+						ValueType nNewValue;
+						__asm__ __volatile__("1: ldarx  %0,0,%1\n\
+												 addi    %0,%0,1\n\
+												 stdcx. %0,0,%1\n\
+												 bne-    1b"
+											   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+						return nNewValue;
+					}
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+					{
+						ValueType nNewValue;
+						__asm__ __volatile__("1: ldarx  %0,0,%1\n\
+												 addi    %0,%0,-1\n\
+												 stdcx. %0,0,%1\n\
+												 bne-    1b"
+											   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+						return nNewValue;
+					}
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+					{
+						ValueType nNewValue;
+						__asm__ __volatile__("1: ldarx  %0,0,%1\n\
+												 addi    %0,%0,-1\n\
+												 stdcx. %0,0,%1\n\
+												 bne-    1b"
+											   : "=&b" (nNewValue) : "b" (&mValue) : "cc", "memory");
+						return nNewValue;
+					}
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+					{
+						ValueType nNewValue;
+						__asm__ __volatile__("1: ldarx    %0,0,%2\n\
+												 add      %0,%1,%0\n\
+												 stdcx.  %0,0,%2\n\
+												 bne-     1b"
+											   : "=&b" (nNewValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+						return nNewValue;
+					}
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+					{
+						ValueType nNewValue;
+						__asm__ __volatile__("1: ldarx    %0,0,%2\n\
+												 add      %0,%1,%0\n\
+												 stdcx.  %0,0,%2\n\
+												 bne-     1b"
+											   : "=&b" (nNewValue) : "r" (n), "b" (&mValue) : "cc", "memory");
+						return nNewValue;
+					}
+
+				#endif
+
+			#endif
+
+		} // namespace Thread
+
+	} // namespace EA
+
+
+#endif // EA_PROCESSOR_XXXX
+
+
+#ifdef _MSC_VER
+	 #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_POWERPC_EATHREAD_ATOMIC_POWERPC_H
+
+
+
+
+
+
+
+

+ 31 - 0
include/eathread/powerpc/eathread_sync_powerpc.h

@@ -0,0 +1,31 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_POWERPC_EATHREAD_SYNC_POWERPC_H
+#define EATHREAD_POWERPC_EATHREAD_SYNC_POWERPC_H
+
+
+#ifndef INCLUDED_eabase_H
+	#include <EABase/eabase.h>
+#endif
+
+
+#endif // EATHREAD_POWERPC_EATHREAD_SYNC_POWERPC_H
+
+
+
+
+
+
+
+

+ 431 - 0
include/eathread/shared_array_mt.h

@@ -0,0 +1,431 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This is a multithread-safe version of shared_array_mt.
+// For basic documentation, see shared_array_mt.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_SHARED_ARRAY_MT_H
+#define EATHREAD_SHARED_ARRAY_MT_H
+
+#ifndef INCLUDED_eabase_H
+   #include <EABase/eabase.h>
+#endif
+#ifndef EATHREAD_EATHREAD_FUTEX_H
+   #include <eathread/eathread_futex.h>
+#endif
+#include <stddef.h> // More properly: #include <cstddef> // Definition of std::ptrdiff_t
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+
+/// namespace EA
+/// The standard Electronic Arts namespace
+namespace EA
+{
+   namespace Thread
+   {
+	  /// class shared_array_mt
+	  /// A shared_array_mt is the same as shared_ptr but for arrays. 
+	  template<class T>
+	  class shared_array_mt
+	  {
+	  private:
+		 /// this_type
+		 /// This is an alias for shared_array_mt<T>, this class.
+		 typedef shared_array_mt<T> this_type;
+
+		 /// reference_count_type
+		 /// An internal reference count type. Must be convertable to int
+		 /// so that the public use_count function can work.
+		 typedef EA::Thread::AtomicInt32 reference_count_type;
+
+		 T*                    mpArray;      /// The owned pointer. Points to an array of T.
+		 reference_count_type* mpRefCount;   /// Reference count for owned pointer.
+		 mutable Futex         mMutex;       /// Mutex guarding access to this class.
+
+	  public:
+		 typedef T element_type;
+		 typedef T value_type;
+
+		 /// shared_array_mt
+		 /// Takes ownership of the pointer and sets the reference count
+		 /// to the pointer to 1. It is OK if the input pointer is null.
+		 /// The shared reference count is allocated on the heap via operator new.
+		 /// If an exception occurs during the allocation of the shared 
+		 /// reference count, the owned pointer is deleted and the exception
+		 /// is rethrown. A null pointer is given a reference count of 1.
+		 explicit shared_array_mt(T* pArray = 0)
+			: mpArray(pArray), mMutex()
+		 {
+			// We don't lock our mutex in this function, as this is the constructor
+			// and we assume that construction is already done in a thread-safe way
+			// by the owner of this object.
+			#if defined(EA_COMPILER_NO_EXCEPTIONS) || defined(EA_COMPILER_NO_UNWIND)
+			   mpRefCount = new reference_count_type(1);
+			#else
+				EA_DISABLE_VC_WARNING(4571)
+				try
+				{
+					mpRefCount = new reference_count_type(1);
+				}
+				catch(...)
+				{
+					delete[] mpArray;
+					//mpRefCount = 0; shouldn't be necessary.
+					throw;
+				}
+				EA_RESTORE_VC_WARNING()
+			#endif
+		 }
+
+		 /// shared_array_mt
+		 /// Shares ownership of a pointer with another instance of shared_array_mt.
+		 /// This function increments the shared reference count on the pointer.
+		 shared_array_mt(shared_array_mt const& sharedArray)
+			: mMutex()
+		 {
+			sharedArray.lock();
+			mpArray    = sharedArray.mpArray;
+			mpRefCount = sharedArray.mpRefCount;
+			mpRefCount->Increment(); // Atomic operation
+			sharedArray.unlock();
+		 }
+
+		 /// ~shared_array_mt
+		 /// Decrements the reference count for the owned pointer. If the 
+		 /// reference count goes to zero, the owned pointer is deleted and
+		 /// the shared reference count is deleted.
+		 ~shared_array_mt()
+		 {
+			lock();
+			const reference_count_type newRefCount(mpRefCount->Decrement()); // Atomic operation
+			// EAT_ASSERT(newRefCount >= 0);
+			if(newRefCount == 0)
+			{
+			   delete[] mpArray;
+			   delete mpRefCount;
+			}
+			unlock();
+		 }
+
+		 /// operator=
+		 /// Copies another shared_array_mt to this object. Note that this object
+		 /// may already own a shared pointer with another different pointer
+		 /// (but still of the same type) before this call. In that case,
+		 /// this function releases the old pointer, decrementing its reference
+		 /// count and deleting it if zero, takes shared ownership of the new 
+		 /// pointer and increments its reference count.
+		 shared_array_mt& operator=(shared_array_mt const& sharedArray)
+		 {
+			// We don't lock mutexes here because we let the swap function
+			// below do the locking and assignment. The if statement below
+			// isn't protected within a lock operation because it wouldn't
+			// help by being so because if mpValue is changing during the 
+			// the execution of this function then the user has an external 
+			// race condition that needs to be managed at that level.
+			if(mpArray != sharedArray.mpArray)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_array_mt(sharedArray).swap(*this);
+			}
+			return *this;
+		 }
+
+		 // operator=
+		 // We do not defined this function in order to maintain compatibility 
+		 // with the currently proposed (2003) C++ standard addition.  Use reset instead.
+		 // shared_array_mt& operator=(T* pValue)
+		 // {
+		 //     reset(pValue);
+		 //     return *this;
+		 // }
+
+		 /// lock
+		 /// @brief Locks our mutex for thread-safe access.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void lock() const
+		 {
+			mMutex.Lock(); 
+		 }
+
+		 /// unlock
+		 /// @brief Unlocks our mutex which was previous locked.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void unlock() const
+		 {
+			mMutex.Unlock(); 
+		 }
+
+		 /// reset
+		 /// Releases the owned pointer and takes ownership of the 
+		 /// passed in pointer. If the passed in pointer is the same
+		 /// as the owned pointer, nothing is done. The passed in pointer
+		 /// can be null, in which case the use count is set to 1.
+		 void reset(T* pArray = 0)
+		 {
+			// We don't lock any mutexes here because we let the swap function do that.
+			// We don't lock for the 'if' statement below because that wouldn't really buy anything.
+			if(pArray != mpArray)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_array_mt(pArray).swap(*this);
+			}
+		 }
+
+		 /// swap
+		 /// Exchanges the owned pointer beween two shared_array_mt objects.
+		 void swap(shared_array_mt<T>& sharedArray)
+		 {
+			lock();
+			sharedArray.lock();
+
+			// std::swap(mpArray, sharedArray.mpArray); // Not used so that we can reduce a dependency.
+			T* const pArray     = sharedArray.mpArray;
+			sharedArray.mpArray = mpArray;
+			mpArray             = pArray;
+
+			// std::swap(mpRefCount, sharedArray.mpRefCount); // Not used so that we can reduce a dependency.
+			reference_count_type* const pRefCount = sharedArray.mpRefCount;
+			sharedArray.mpRefCount = mpRefCount;
+			mpRefCount             = pRefCount;
+
+			sharedArray.unlock();
+			unlock();
+		 }
+
+		 /// operator[]
+		 /// Returns a reference to the specified item in the owned pointer
+		 /// array. 
+		 /// Example usage:
+		 ///   shared_array_mt<int> ptr = new int[6];
+		 ///   int x = ptr[2];
+		 T& operator[](ptrdiff_t i) const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpArray && (i >= 0));
+			return mpArray[i];
+		 }
+
+		 /// operator*
+		 /// Returns the owner pointer dereferenced.
+		 /// Example usage:
+		 ///   shared_array_mt<int> ptr = new int(3);
+		 ///   int x = *ptr;
+		 T& operator*() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpArray);
+			return *mpArray;
+		 }
+
+		 /// operator->
+		 /// Allows access to the owned pointer via operator->()
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_array_mt<int> ptr = new X;
+		 ///   ptr->DoSomething();
+		 T* operator->() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpArray);
+			return mpArray;
+		 }
+
+		 /// get
+		 /// Returns the owned pointer. Note that this class does 
+		 /// not provide an operator T() function. This is because such
+		 /// a thing (automatic conversion) is deemed unsafe.
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_array_mt<int> ptr = new X;
+		 ///   X* pX = ptr.get();
+		 ///   pX->DoSomething();
+		 T* get() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			return mpArray;
+		 }
+
+		 /// use_count
+		 /// Returns the reference count on the owned pointer.
+		 /// The return value is one if the owned pointer is null.
+		 int use_count() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (int)*mpRefCount;
+		 }
+
+		 /// unique
+		 /// Returns true if the reference count on the owned pointer is one.
+		 /// The return value is true if the owned pointer is null.
+		 bool unique() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (*mpRefCount == 1);
+		 }
+
+		 /// add_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int add_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    ++*mpRefCount; // Atomic operation
+		 ///    unlock();
+		 /// }
+
+		 /// release_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// If the reference count becomes zero, then the owned pointer 
+		 /// is deleted and reset(0) is called. For any given instance of
+		 /// shared_ptr, release_ref can only be called as many times as -- 
+		 /// but no more than -- the number of times add_ref was called
+		 /// for that same shared_ptr. Otherwise, separate instances of 
+		 /// shared_ptr would be left with dangling owned pointer instances.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int release_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    if(*mpRefCount > 1){
+		 ///       const int nReturnValue = --*mpRefCount; // Atomic operation
+		 ///       unlock();
+		 ///       return nReturnValue;
+		 ///    }
+		 ///    reset(0);
+		 ///    unlock();
+		 ///    return 0;
+		 /// }
+
+		 /// Implicit operator bool
+		 /// Allows for using a scoped_ptr as a boolean. 
+		 /// Example usage:
+		 ///   shared_array_mt<int> ptr = new int(3);
+		 ///   if(ptr)
+		 ///      ++*ptr;
+		 ///    
+		 /// Note that below we do not use operator bool(). The reason for this
+		 /// is that booleans automatically convert up to short, int, float, etc.
+		 /// The result is that this: if(scopedPtr == 1) would yield true (bad).
+		 typedef T* (this_type::*bool_)() const;
+		 operator bool_() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			if(mpArray)
+			   return &this_type::get;
+			return 0;
+		 }
+
+		 /// operator!
+		 /// This returns the opposite of operator bool; it returns true if 
+		 /// the owned pointer is null. Some compilers require this and some don't.
+		 ///   shared_array_mt<int> ptr = new int(3);
+		 ///   if(!ptr)
+		 ///      EAT_ASSERT(false);
+		 bool operator!() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			return (mpArray == 0);
+		 }
+
+	  }; // class shared_array_mt
+
+
+	  /// get_pointer
+	  /// returns shared_array_mt::get() via the input shared_array_mt. 
+	  template<class T>
+	  inline T* get_pointer(const shared_array_mt<T>& sharedArray)
+	  {
+		 return sharedArray.get();
+	  }
+
+	  /// swap
+	  /// Exchanges the owned pointer beween two shared_array_mt objects.
+	  /// This non-member version is useful for compatibility of shared_array_mt
+	  /// objects with the C++ Standard Library and other libraries.
+	  template<class T>
+	  inline void swap(shared_array_mt<T>& sharedArray1, shared_array_mt<T>& sharedArray2)
+	  {
+		 sharedArray1.swap(sharedArray2);
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_array_mt objects for equality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_array_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_array_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator==(const shared_array_mt<T>& sharedArray1, const shared_array_mt<U>& sharedArray2)
+	  {
+		 // EAT_ASSERT((sharedArray1.get() != sharedArray2.get()) || (sharedArray1.use_count() == sharedArray2.use_count()));
+		 return (sharedArray1.get() == sharedArray2.get());
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_array_mt objects for inequality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_array_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_array_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator!=(const shared_array_mt<T>& sharedArray1, const shared_array_mt<U>& sharedArray2)
+	  {
+		 // EAT_ASSERT((sharedArray1.get() != sharedArray2.get()) || (sharedArray1.use_count() == sharedArray2.use_count()));
+		 return (sharedArray1.get() != sharedArray2.get());
+	  }
+
+
+	  /// operator<
+	  /// Returns which shared_array_mt is 'less' than the other. Useful when storing
+	  /// sorted containers of scoped_ptr objects.
+	  template<class T, class U>
+	  inline bool operator<(const shared_array_mt<T>& sharedArray1, const shared_array_mt<U>& sharedArray2)
+	  {
+		 return (sharedArray1.get() < sharedArray2.get()); // Alternatively use: std::less<T*>(a.get(), b.get());
+	  }
+
+   } // namespace Thread
+
+} // namespace EA
+
+
+
+
+#endif // EATHREAD_SHARED_ARRAY_MT_H
+
+
+
+
+
+
+
+
+
+
+

+ 472 - 0
include/eathread/shared_ptr_mt.h

@@ -0,0 +1,472 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// This is a multithread-safe version of shared_ptr_mt.
+// For basic documentation, see shared_ptr_mt.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_SHARED_PTR_MT_H
+#define EATHREAD_SHARED_PTR_MT_H
+
+#ifndef INCLUDED_eabase_H
+   #include <EABase/eabase.h>
+#endif
+#ifndef EATHREAD_EATHREAD_FUTEX_H
+   #include <eathread/eathread_futex.h>
+#endif
+#ifndef EATHREAD_EATHREAD_ATOMIC_H
+   #include <eathread/eathread_atomic.h>
+#endif
+// #include <memory> Temporarily disabled while we wait for compilers to modernize. // Declaration of std::auto_ptr.
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+
+/// namespace EA
+/// The standard Electronic Arts namespace
+namespace EA
+{
+   namespace Thread
+   {
+	  /// class shared_ptr_mt
+	  /// @brief Implements a thread-safe version of shared_ptr.
+	  template<class T>
+	  class shared_ptr_mt
+	  {
+	  private:
+		 /// this_type
+		 /// This is an alias for shared_ptr_mt<T>, this class.
+		 typedef shared_ptr_mt<T> this_type;
+
+		 /// reference_count_type
+		 /// An internal reference count type. Must be convertable to int
+		 /// so that the public use_count function can work.
+		 typedef EA::Thread::AtomicInt32 reference_count_type;
+
+		 T*                    mpValue;      /// The owned pointer.
+		 reference_count_type* mpRefCount;   /// Reference count for owned pointer.
+		 mutable Futex         mMutex;       /// Mutex guarding access to this class.
+
+	  public:
+		 typedef T element_type;
+		 typedef T value_type;
+
+		 /// shared_ptr_mt
+		 /// Takes ownership of the pointer and sets the reference count
+		 /// to the pointer to 1. It is OK if the input pointer is null.
+		 /// The shared reference count is allocated on the heap via operator new.
+		 /// If an exception occurs during the allocation of the shared 
+		 /// reference count, the owned pointer is deleted and the exception
+		 /// is rethrown. A null pointer is given a reference count of 1.
+		 explicit shared_ptr_mt(T* pValue = 0)
+			: mpValue(pValue), mMutex()
+		 {
+			// We don't lock our mutex in this function, as this is the constructor
+			// and we assume that construction is already done in a thread-safe way
+			// by the owner of this object.
+			#if defined(EA_COMPILER_NO_EXCEPTIONS) || defined(EA_COMPILER_NO_UNWIND)
+			   mpRefCount = new reference_count_type(1);
+			#else
+				EA_DISABLE_VC_WARNING(4571)
+				try
+				{
+					mpRefCount = new reference_count_type(1);
+				}
+				catch(...)
+				{
+					delete pValue;
+					//mpRefCount = 0; shouldn't be necessary.
+					throw;
+				}
+				EA_RESTORE_VC_WARNING()
+			#endif
+		 }
+
+		 /// shared_ptr_mt
+		 /// Shares ownership of a pointer with another instance of shared_ptr_mt.
+		 /// This function increments the shared reference count on the pointer.
+		 shared_ptr_mt(shared_ptr_mt const& sharedPtr)
+			: mMutex()
+		 {
+			// We don't lock our mutex in this function, as this is the constructor
+			// and we assume that construction is already done in a thread-safe way
+			// by the owner of this object.
+			sharedPtr.lock();
+			mpValue    = sharedPtr.mpValue;
+			mpRefCount = sharedPtr.mpRefCount;
+			mpRefCount->Increment(); // Atomic operation
+			sharedPtr.unlock();
+		 }
+
+		 // Temporarily disabled while we wait for compilers to modernize.
+		 // 
+		 // shared_ptr_mt
+		 // Constructs a shared_ptr_mt from a std::auto_ptr. This class  
+		 // transfers ownership of the pointer from the auto_ptr by 
+		 // calling its release function.
+		 // If an exception occurs during the allocation of the shared 
+		 // reference count, the owned pointer is deleted and the exception
+		 // is rethrown.
+		 //explicit shared_ptr_mt(std::auto_ptr<T>& autoPtr)
+		 //   : mMutex()
+		 //{
+		 //   // We don't lock our mutex in this function, as this is the constructor
+		 //   // and we assume that construction is already done in a thread-safe way
+		 //   // by the owner of this object.
+		 //   mpValue = autoPtr.release();
+		 //
+		 //   #if defined(EA_COMPILER_NO_EXCEPTIONS) || defined(EA_COMPILER_NO_UNWIND)
+		 //      mpRefCount = new reference_count_type(1);
+		 //   #else
+		 //      try
+		 //      {
+		 //         mpRefCount = new reference_count_type(1);
+		 //      }
+		 //      catch(...)
+		 //      {
+		 //         delete mpValue;
+		 //         mpValue = 0;
+		 //         //mpRefCount = 0; shouldn't be necessary.
+		 //         throw;
+		 //      }
+		 //   #endif
+		 //} 
+
+		 /// ~shared_ptr_mt
+		 /// Decrements the reference count for the owned pointer. If the 
+		 /// reference count goes to zero, the owned pointer is deleted and
+		 /// the shared reference count is deleted.
+		 ~shared_ptr_mt()
+		 {
+			lock();
+			const reference_count_type newRefCount(mpRefCount->Decrement()); // Atomic operation
+			// EAT_ASSERT(newRefCount >= 0);
+			if(newRefCount == 0)
+			{
+				// we should only be deleting the pointer if it is not null.  It is possible that the 
+				// user has created a shared ptr without passing in a value.
+				if (mpValue)
+					delete mpValue;
+				delete mpRefCount;
+			}
+			unlock();
+		 }
+
+		 /// operator=
+		 /// Copies another shared_ptr_mt to this object. Note that this object
+		 /// may already own a shared pointer with another different pointer
+		 /// (but still of the same type) before this call. In that case,
+		 /// this function releases the old pointer, decrementing its reference
+		 /// count and deleting it if zero, takes shared ownership of the new 
+		 /// pointer and increments its reference count.
+		 shared_ptr_mt& operator=(shared_ptr_mt const& sharedPtr)
+		 {
+			// We don't lock mutexes here because we let the swap function
+			// below do the locking and assignment. The if statement below
+			// isn't protected within a lock operation because it wouldn't
+			// help by being so because if mpValue is changing during the 
+			// the execution of this function then the user has an external 
+			// race condition that needs to be managed at that level.
+			if(mpValue != sharedPtr.mpValue)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_ptr_mt(sharedPtr).swap(*this);
+			}
+			return *this;
+		 }
+
+		 // Temporarily disabled while we wait for compilers to modernize.
+		 // 
+		 // operator=
+		 // Transfers ownership of a std::auto_ptr to this class.
+		 //shared_ptr_mt& operator=(std::auto_ptr<T>& autoPtr)
+		 //{
+		 //   // We don't lock any mutexes here because we let the swap function do that.
+		 //   // EAT_ASSERT(mpValue != autoPtr.get());
+		 //   shared_ptr_mt(autoPtr).swap(*this);
+		 //   return *this;
+		 //}
+
+		 // operator=
+		 // We do not defined this function in order to maintain compatibility 
+		 // with the currently proposed (2003) C++ standard addition. Use reset instead.
+		 // shared_ptr_mt& operator=(T* pValue);
+		 // {
+		 //     reset(pValue);
+		 //     return *this;
+		 // }
+
+		 /// lock
+		 /// @brief Locks our mutex for thread-safe access.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void lock() const
+		 {
+			mMutex.Lock(); 
+		 }
+
+		 /// unlock
+		 /// @brief Unlocks our mutex which was previous locked.
+		 /// It is a const function because const-ness refers to the underlying pointer being
+		 /// held and not this class.
+		 void unlock() const
+		 {
+			mMutex.Unlock(); 
+		 }
+
+		 /// reset
+		 /// Releases the owned pointer and takes ownership of the 
+		 /// passed in pointer. If the passed in pointer is the same
+		 /// as the owned pointer, nothing is done. The passed in pointer
+		 /// can be null, in which case the use count is set to 1.
+		 void reset(T* pValue = 0)
+		 {
+			// We don't lock any mutexes here because we let the swap function do that.
+			// We don't lock for the 'if' statement below because that wouldn't really buy anything.
+			if(pValue != mpValue)
+			{
+			   // The easiest thing to do is to create a temporary and 
+			   // copy ourselves ourselves into it. This is a standard 
+			   // method for switching pointer ownership in systems like this.
+			   shared_ptr_mt(pValue).swap(*this);
+			}
+		 }
+
+		 /// swap
+		 /// Exchanges the owned pointer beween two shared_ptr_mt objects.
+		 void swap(shared_ptr_mt<T>& sharedPtr)
+		 {
+			lock();
+			sharedPtr.lock();
+
+			// std::swap(mpValue, sharedPtr.mpValue); // Not used so that we can reduce a dependency.
+			T* const pValue   = sharedPtr.mpValue;
+			sharedPtr.mpValue = mpValue;
+			mpValue           = pValue;
+
+			// std::swap(mpRefCount, sharedPtr.mpRefCount); // Not used so that we can reduce a dependency.
+			reference_count_type* const pRefCount = sharedPtr.mpRefCount;
+			sharedPtr.mpRefCount = mpRefCount;
+			mpRefCount           = pRefCount;
+
+			sharedPtr.unlock();
+			unlock();
+		 }
+
+		 /// operator*
+		 /// Returns the owner pointer dereferenced.
+		 /// Example usage:
+		 ///   shared_ptr_mt<int> ptr = new int(3);
+		 ///   int x = *ptr;
+		 T& operator*() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpValue);
+			return *mpValue;
+		 }
+
+		 /// operator->
+		 /// Allows access to the owned pointer via operator->()
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_ptr_mt<int> ptr = new X;
+		 ///   ptr->DoSomething();
+		 T* operator->() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpValue);
+			return mpValue;
+		 }
+
+		 /// get
+		 /// Returns the owned pointer. Note that this class does 
+		 /// not provide an operator T() function. This is because such
+		 /// a thing (automatic conversion) is deemed unsafe.
+		 /// Example usage:
+		 ///   struct X{ void DoSomething(); }; 
+		 ///   shared_ptr_mt<int> ptr = new X;
+		 ///   X* pX = ptr.get();
+		 ///   pX->DoSomething();
+		 T* get() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			return mpValue;
+		 }
+
+		 /// use_count
+		 /// Returns the reference count on the owned pointer.
+		 /// The return value is one if the owned pointer is null.
+		 int use_count() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (int)*mpRefCount;
+		 }
+
+		 /// unique
+		 /// Returns true if the reference count on the owned pointer is one.
+		 /// The return value is true if the owned pointer is null.
+		 bool unique() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			// We don't put a SMP read barrier here because we assume the caller does such things.
+			// EAT_ASSERT(mpRefCount);
+			return (*mpRefCount == 1);
+		 }
+
+		 /// add_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int add_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    ++*mpRefCount; // Atomic operation
+		 ///    unlock();
+		 /// }
+
+		 /// release_ref
+		 /// Manually increments the reference count on the owned pointer.
+		 /// If the reference count becomes zero, then the owned pointer 
+		 /// is deleted and reset(0) is called. For any given instance of
+		 /// shared_ptr_mt, release_ref can only be called as many times as -- 
+		 /// but no more than -- the number of times add_ref was called
+		 /// for that same shared_ptr_mt. Otherwise, separate instances of 
+		 /// shared_ptr_mt would be left with dangling owned pointer instances.
+		 /// This is currently disabled because it isn't in part of the 
+		 /// proposed C++ language addition.
+		 /// int release_ref()
+		 /// {
+		 ///    lock();
+		 ///    // EAT_ASSERT(mpRefCount);
+		 ///    if(*mpRefCount > 1){
+		 ///       const int nReturnValue = --*mpRefCount; // Atomic operation
+		 ///       unlock();
+		 ///       return nReturnValue;
+		 ///    }
+		 ///    reset(0);
+		 ///    unlock();
+		 ///    return 0;
+		 /// }
+
+		 /// Implicit operator bool
+		 /// Allows for using a scoped_ptr as a boolean. 
+		 /// Example usage:
+		 ///   shared_ptr_mt<int> ptr = new int(3);
+		 ///   if(ptr)
+		 ///      ++*ptr;
+		 ///    
+		 /// Note that below we do not use operator bool(). The reason for this
+		 /// is that booleans automatically convert up to short, int, float, etc.
+		 /// The result is that this: if(scopedPtr == 1) would yield true (bad).
+		 typedef T* (this_type::*bool_)() const;
+		 operator bool_() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			if(mpValue)
+			   return &this_type::get;
+			return 0;
+		 }
+
+		 /// operator!
+		 /// This returns the opposite of operator bool; it returns true if 
+		 /// the owned pointer is null. Some compilers require this and some don't.
+		 ///   shared_ptr_mt<int> ptr = new int(3);
+		 ///   if(!ptr)
+		 ///      EAT_ASSERT(false);
+		 bool operator!() const
+		 {
+			// We don't lock here because this is essentially a read operation.
+			return (mpValue == 0);
+		 }
+
+	  }; // class shared_ptr_mt
+
+
+	  /// get_pointer
+	  /// returns shared_ptr_mt::get() via the input shared_ptr_mt. 
+	  template<class T>
+	  inline T* get_pointer(const shared_ptr_mt<T>& sharedPtr)
+	  {
+		 return sharedPtr.get();
+	  }
+
+	  /// swap
+	  /// Exchanges the owned pointer beween two shared_ptr_mt objects.
+	  /// This non-member version is useful for compatibility of shared_ptr_mt
+	  /// objects with the C++ Standard Library and other libraries.
+	  template<class T>
+	  inline void swap(shared_ptr_mt<T>& sharedPtr1, shared_ptr_mt<T>& sharedPtr2)
+	  {
+		 sharedPtr1.swap(sharedPtr2);
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_ptr_mt objects for equality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_ptr_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_ptr_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator==(const shared_ptr_mt<T>& sharedPtr1, const shared_ptr_mt<U>& sharedPtr2)
+	  {
+		 // EAT_ASSERT((sharedPtr1.get() != sharedPtr2.get()) || (sharedPtr1.use_count() == sharedPtr2.use_count()));
+		 return (sharedPtr1.get() == sharedPtr2.get());
+	  }
+
+
+	  /// operator!=
+	  /// Compares two shared_ptr_mt objects for inequality. Equality is defined as 
+	  /// being true when the pointer shared between two shared_ptr_mt objects is equal.
+	  /// It is debatable what the appropriate definition of equality is between two
+	  /// shared_ptr_mt objects, but we follow the current 2nd generation C++ standard proposal.
+	  template<class T, class U>
+	  inline bool operator!=(const shared_ptr_mt<T>& sharedPtr1, const shared_ptr_mt<U>& sharedPtr2)
+	  {
+		 // EAT_ASSERT((sharedPtr1.get() != sharedPtr2.get()) || (sharedPtr1.use_count() == sharedPtr2.use_count()));
+		 return (sharedPtr1.get() != sharedPtr2.get());
+	  }
+
+
+	  /// operator<
+	  /// Returns which shared_ptr_mt is 'less' than the other. Useful when storing
+	  /// sorted containers of shared_ptr_mt objects.
+	  template<class T, class U>
+	  inline bool operator<(const shared_ptr_mt<T>& sharedPtr1, const shared_ptr_mt<U>& sharedPtr2)
+	  {
+		 return (sharedPtr1.get() < sharedPtr2.get()); // Alternatively use: std::less<T*>(a.get(), b.get());
+	  }
+
+   } // namespace Thread
+
+} // namespace EA
+
+
+
+
+#endif // EATHREAD_SHARED_PTR_MT_H
+
+
+
+
+
+
+
+
+
+
+

+ 47 - 0
include/eathread/version.h

@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_VERSION_H
+#define EATHREAD_VERSION_H
+
+
+#include <eathread/internal/config.h>
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		/// Version contains the version of the library when it was built.
+		/// This can be used to verify the correct version has been linked
+		/// into the executable or loaded by the O/S (in the case of a DLL).
+		struct Version
+		{
+			int mMajor;
+			int mMinor;
+			int mPatch;
+		};
+
+		/// Get the library version information.
+		EATHREADLIB_API const Version *GetVersion();
+
+		/// Check that the linked/loaded library is the same as the headers 
+		/// are expecting.
+		///
+		/// If the version numbers passed to CheckVersion match those
+		/// built into the library when it was compiled, true is returned. 
+		/// If not, false is returned.
+		EATHREADLIB_API bool CheckVersion(int majorVersion, int minorVersion, int patchVersion);
+
+	}
+
+}
+
+#endif

+ 462 - 0
include/eathread/x86-64/eathread_atomic_x86-64.h

@@ -0,0 +1,462 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for threadsafe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
+#define EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
+
+#include "EABase/eabase.h"
+#include <stddef.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+
+#ifdef _MSC_VER
+	#pragma warning(push, 0)
+	#include <math.h>   // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
+	#include <intrin.h>
+	#pragma warning(pop)
+
+	#pragma warning(push)
+	#pragma warning(disable: 4146)  // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+
+	#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			///
+			/// Non-member 128-bit Atomics implementation 
+			///
+			#if (_MSC_VER >= 1500) // VS2008+
+
+				#define EATHREAD_ATOMIC_128_SUPPORTED 1
+
+				// Algorithm for implementing an arbitrary atomic modification via AtomicCompareAndSwap:
+				//     int128_t oldValue;
+				//
+				//     do {
+				//         oldValue = AtomicGetValue(dest);
+				//         newValue = <modification of oldValue>
+				//     } while(!AtomicCompareAndSwap(dest, oldValue, newValue));
+ 
+				// The following function is a wrapper for the Microsoft _InterlockedCompareExchange128 function.
+				// Early versions of AMD 64-bit hardware do not support 128 bit atomics. To check for hardware support 
+				// for the cmpxchg16b instruction, call the __cpuid intrinsic with InfoType=0x00000001 (standard function 1). 
+				// Bit 13 of CPUInfo[2] (ECX) is 1 if the instruction is supported.
+
+				inline bool AtomicSetValueConditionall28(volatile int64_t* dest128, const int64_t* value128, const int64_t* condition128)
+				{
+					__int64 conditionCopy[2] = { condition128[0], condition128[1] };                              // We make a copy because Microsoft modifies the output, which is inconsistent with the rest of our atomic API.
+					return _InterlockedCompareExchange128(dest128, value128[1], value128[0], conditionCopy) == 1; // Question: Do we need to reverse the order of value128 if running on big-endian? Microsoft's documentation currently doesn't address this.
+				}
+
+				inline bool AtomicSetValueConditionall28(volatile uint64_t* dest128, const uint64_t* value128, const uint64_t* condition128)
+				{ 
+					__int64 conditionCopy[2] = { (int64_t) condition128[0],  (int64_t)condition128[1] };                                               // We make a copy because Microsoft modifies the output, which is inconsistent with the rest of our atomic API.
+					return _InterlockedCompareExchange128((volatile int64_t*)dest128, (int64_t)value128[1], (int64_t)value128[0], conditionCopy) == 1; // Question: Do we need to reverse the order of value128 if running on big-endian? Microsoft's documentation currently doesn't address this.
+				}
+
+			#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+
+				#if defined(EA_COMPILER_CLANG) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 403)) // GCC 4.3 or later for 128 bit atomics
+
+					#define EATHREAD_ATOMIC_128_SUPPORTED 1
+
+					// GCC on x64 implements all of its __sync functions below via the cmpxchg16b instruction,
+					// usually in the form of a loop.
+					// Use of 128 bit atomics on GCC requires compiling with the -mcx16 compiler argument. 
+					// See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
+
+					inline __int128_t AtomicGetValue(volatile __int128_t* source)
+					{
+						return __sync_add_and_fetch(source, __int128_t(0)); // Is there a better way to do an atomic read?
+					}
+
+					inline void AtomicSetValue(volatile __int128_t* dest, __int128_t value)
+					{
+						__sync_lock_test_and_set(dest, value);
+					}
+
+					inline __int128_t AtomicIncrement(volatile __int128_t* dest)
+					{
+						return __sync_add_and_fetch(dest, __int128_t(1));
+					}
+
+					inline __int128_t AtomicDecrement(volatile __int128_t* dest)
+					{
+						return __sync_add_and_fetch(dest, __int128_t(-1));
+					}
+
+					inline __int128_t AtomicAdd(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_add_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicOr(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_or_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicAnd(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_and_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicXor(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_xor_and_fetch(dest, value);
+					}
+
+					inline __int128_t AtomicSwap(volatile __int128_t* dest, __int128_t value)
+					{
+						return __sync_lock_test_and_set(dest, value);
+					}
+
+					inline bool AtomicSetValueConditional(volatile __int128_t* dest, __int128_t value, __int128_t condition)
+					{
+						return __sync_bool_compare_and_swap(dest, condition, value);
+					}
+
+					inline bool AtomicSetValueConditional(volatile __uint128_t* dest, __uint128_t value, __uint128_t condition)
+					{
+						return __sync_bool_compare_and_swap(dest, condition, value);
+					}
+
+					// The following 64-bit-based 128 bit atomic is provided for compatibility with the Microsoft version.
+					// GCC supports the native __int128_t data type and thus can support a 128-bit-based 128 bit atomic.
+
+					inline bool AtomicSetValueConditionall28(volatile int64_t* dest128, const int64_t* value128, const int64_t* condition128)
+					{
+						// Use of this requires compiling with the -mcx16 compiler argument. See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
+						return __sync_bool_compare_and_swap((volatile __int128_t*)dest128, *(volatile __int128_t*)condition128, *(volatile __int128_t*)value128);
+					}
+
+					inline bool AtomicSetValueConditionall28(volatile uint64_t* dest128, const uint64_t* value128, const uint64_t* condition128)
+					{
+						// Use of this requires compiling with the -mcx16 compiler argument. See http://gcc.gnu.org/onlinedocs/gcc/i386-and-x86_002d64-Options.html.
+						return __sync_bool_compare_and_swap((volatile __uint128_t*)dest128, *(volatile __uint128_t*)condition128, *(volatile __uint128_t*)value128);
+					}
+
+				#endif
+
+			#endif
+
+
+
+			/// class AtomicInt
+			/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+			/// and declaration specifications per platform.
+
+			template <class T>
+			class  AtomicInt
+			{
+			public:
+				typedef AtomicInt<T> ThisType;
+				typedef T            ValueType;
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				AtomicInt()
+					{}
+
+				AtomicInt(ValueType n) 
+					{ SetValue(n); }
+
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+				ValueType GetValue() const;
+				ValueType SetValue(ValueType n);
+				bool      SetValueConditional(ValueType n, ValueType condition);
+				ValueType Increment();
+				ValueType Decrement();
+				ValueType Add(ValueType n);
+
+				// operators
+				inline            operator const ValueType() const { return GetValue(); }  // Should this be provided? Is it safe enough? Return value of 'const' attempts to make this safe from misuse.
+				inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+				inline ValueType  operator+=(ValueType n)          { return Add(n);}
+				inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+				inline ValueType  operator++()                     { return Increment();}
+				inline ValueType  operator++(int)                  { return Increment() - 1;}
+				inline ValueType  operator--()                     { return Decrement(); }
+				inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+			protected:
+				volatile ValueType mValue;
+			};
+
+
+			#if defined(EA_COMPILER_MSVC)
+				#pragma intrinsic(_InterlockedExchange)
+				#pragma intrinsic(_InterlockedExchangeAdd)
+				#pragma intrinsic(_InterlockedCompareExchange)
+				#pragma intrinsic(_InterlockedIncrement)
+				#pragma intrinsic(_InterlockedDecrement)
+				#pragma intrinsic(_InterlockedExchange64)
+				#pragma intrinsic(_InterlockedExchangeAdd64)
+				#pragma intrinsic(_InterlockedCompareExchange64)
+				#pragma intrinsic(_InterlockedIncrement64)
+				#pragma intrinsic(_InterlockedDecrement64)
+
+				// The following should work under any compiler, including such compilers as GCC under
+				// WINE or some other Win32 emulation. Win32 InterlockedXXX functions must exist on
+				// any system that supports the Windows API, be it 32 or 64 bit Windows.
+
+				// 32 bit versions
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd((long*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd((long*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange((long*)&mValue, (long)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange((long*)&mValue, (long)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd((long*)&mValue, (long)n) + n); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd((long*)&mValue, (long)n) + n); }
+
+
+
+				// 64 bit versions
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+					{ return (ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, 0); } // We shouldn't need to do this, as far as I know, given the x86 architecture.
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange64((__int64*)&mValue, (__int64)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+					{ return (ValueType)_InterlockedExchange64((__int64*)&mValue, (__int64)n); } // Even though we shouldn't need to use _InterlockedExchange on x86, the intrinsic x86 _InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange64((__int64*)&mValue, (__int64)n, (__int64)condition) == condition); }
+
+				template<> inline
+				bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)_InterlockedCompareExchange64((__int64*)&mValue, (__int64)n, (__int64)condition) == condition); }
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+					{ return (ValueType)_InterlockedIncrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+					{ return (ValueType)_InterlockedDecrement64((__int64*)&mValue); }
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, (__int64)n) + n); }
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+					{ return ((ValueType)_InterlockedExchangeAdd64((__int64*)&mValue, (__int64)n) + n); }
+
+
+			#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+
+				// Recent versions of GCC have atomic primitives built into the compiler and standard library.
+				#if defined(EA_COMPILER_CLANG) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401)) // GCC 4.1 or later
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+				#endif // GCC 4.1 or later
+
+			#endif // GCC
+
+		} // namespace Thread
+
+
+	} // namespace EA
+
+
+#endif // EA_PROCESSOR_X86_64
+
+
+#ifdef _MSC_VER
+	 #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_X86_64_EATHREAD_ATOMIC_X86_64_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 108 - 0
include/eathread/x86-64/eathread_sync_x86-64.h

@@ -0,0 +1,108 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_64_EATHREAD_SYNC_X86_64_H
+#define EATHREAD_X86_64_EATHREAD_SYNC_X86_64_H
+
+
+#ifndef INCLUDED_eabase_H
+	#include "EABase/eabase.h"
+#endif
+
+
+#if defined(EA_PROCESSOR_X86_64)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	#ifdef _MSC_VER
+		#pragma warning(push, 0)
+		#include <math.h>   // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
+		#include <intrin.h>
+		#pragma warning(pop)
+	#endif
+
+	// By default, we define EA_TARGET_SMP to be true. The reason for this is that most 
+	// applications that users of this code are likely to write are going to be executables
+	// which run properly on any system, be it multiprocessing or not.
+	#ifndef EA_TARGET_SMP
+		#define EA_TARGET_SMP 1
+	#endif
+
+	// EAProcessorPause
+	// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+	// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for 
+	// high performance spinning, as otherwise a high performance penalty incurs. 
+
+	#if defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_BORLAND)
+		// Year 2003+ versions of the Microsoft SDK define 'rep nop' as YieldProcessor and/or __yield or _mm_pause. 
+		#pragma intrinsic(_mm_pause)
+		#define EAProcessorPause() _mm_pause() // The __yield() intrinsic currently doesn't work on x86-64.
+	#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+		#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+	#else
+		// In this case we use an Intel-style asm statement. If this doesn't work for your compiler then 
+		// there most likely is some way to make the `rep nop` inline asm statement. 
+		#define EAProcessorPause() __asm { rep nop } // Alternatively: { __asm { _emit 0xf3 }; __asm { _emit 0x90 } }
+	#endif
+
+
+	// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+	// The x86 processor memory architecture ensures read and write consistency on both single and
+	// multi processing systems. This makes programming simpler but limits maximimum system performance.
+	// We define EAReadBarrier here to be the same as EACompilerMemory barrier in order to limit the 
+	// compiler from making any assumptions at its level about memory usage. Year 2003+ versions of the 
+	// Microsoft SDK define a 'MemoryBarrier' statement which has the same effect as EAReadWriteBarrier.
+	#if defined(EA_COMPILER_MSVC)
+		#pragma intrinsic(_ReadBarrier)
+		#pragma intrinsic(_WriteBarrier)
+		#pragma intrinsic(_ReadWriteBarrier)
+
+		#define EAReadBarrier()      _ReadBarrier()
+		#define EAWriteBarrier()     _WriteBarrier()
+		#define EAReadWriteBarrier() _ReadWriteBarrier()
+	#elif defined(EA_PLATFORM_KETTLE)
+		#define EAReadBarrier()      __asm__ __volatile__ ("lfence" ::: "memory");
+		#define EAWriteBarrier()     __asm__ __volatile__ ("sfence" ::: "memory");
+		#define EAReadWriteBarrier() __asm__ __volatile__ ("mfence" ::: "memory");
+	#elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+		#define EAReadBarrier      __sync_synchronize
+		#define EAWriteBarrier     __sync_synchronize
+		#define EAReadWriteBarrier __sync_synchronize
+	#else
+		#define EAReadBarrier      EACompilerMemoryBarrier // Need to implement this for non-VC++
+		#define EAWriteBarrier     EACompilerMemoryBarrier // Need to implement this for non-VC++
+		#define EAReadWriteBarrier EACompilerMemoryBarrier // Need to implement this for non-VC++
+	#endif
+
+
+	// EACompilerMemoryBarrier
+	#if defined(EA_COMPILER_MSVC)
+		#define EACompilerMemoryBarrier() _ReadWriteBarrier()
+	#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+		#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+	#else
+		#define EACompilerMemoryBarrier() // Possibly `EAT_ASSERT(false)` here?
+	#endif
+
+
+#endif // EA_PROCESSOR_X86
+
+
+#endif // EATHREAD_X86_64_EATHREAD_SYNC_X86_64_H
+
+
+
+
+
+
+
+

+ 742 - 0
include/eathread/x86/eathread_atomic_x86.h

@@ -0,0 +1,742 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Defines functionality for threadsafe primitive operations.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_EATHREAD_ATOMIC_X86_H
+#define EATHREAD_X86_EATHREAD_ATOMIC_X86_H
+
+
+#include <EABase/eabase.h>
+#include <stddef.h>
+#include <eathread/internal/eathread_atomic_standalone.h>
+
+
+#ifdef _MSC_VER
+	 #pragma warning(push)
+	 #pragma warning(disable: 4146)  // unary minus operator applied to unsigned type, result still unsigned
+	 #pragma warning(disable: 4339)  // use of undefined type detected in CLR meta-data
+#endif
+
+
+// This is required for Windows Phone (ARM) because we are temporarily not using
+// CPP11 style atomics and we are depending on the MSVC intrinics.
+#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_ARM)
+	#define EA_THREAD_ATOMIC_IMPLEMENTED
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			/// class AtomicInt
+			/// Actual implementation may vary per platform. May require certain alignments, sizes, 
+			/// and declaration specifications per platform.
+			template <class T>
+			class AtomicInt
+			{
+			public:
+				typedef AtomicInt<T> ThisType;
+				typedef T            ValueType;
+
+				/// AtomicInt
+				/// Empty constructor. Intentionally leaves mValue in an unspecified state.
+				/// This is done so that an AtomicInt acts like a standard built-in integer.
+				/// Problem: C/C++ has two ways to initialize a built-in type x: x and x(),
+				///          and they have different semantics, as the first does nothing but 
+				///          the second initializes x to zero. C++ does not provide a means 
+				///          to tell which of tell which of these two ways a C++ class instance
+				///          initialized. Thus we probably can't easily argue that this constructor 
+				///          should do nothing vs. initialize the variable to 0. It's probably
+				///          safer for us to make it initialize to 0, and it wouldn't break 
+				///          users to do so, though it would add a tiny runtime cost.
+				AtomicInt()
+					{}
+
+				AtomicInt(ValueType n) : mValue(0) // Initialize mValue because otherwise SetValue may read it before it's initialized. 
+					{ SetValue(n); }
+
+				AtomicInt(const ThisType& x)
+					: mValue(x.GetValue()) {}
+
+				AtomicInt& operator=(const ThisType& x)
+					{ mValue = x.GetValue(); return *this; }
+
+				ValueType GetValue() const
+					{ return mValue; }
+
+				ValueType GetValueRaw() const
+					{ return mValue; }
+
+				ValueType SetValue(ValueType n);
+				bool      SetValueConditional(ValueType n, ValueType condition);
+				ValueType Increment();
+				ValueType Decrement();
+				ValueType Add(ValueType n);
+
+				// operators
+				inline            operator const ValueType() const { return GetValue(); }
+				inline ValueType  operator =(ValueType n)          {        SetValue(n); return n; }
+				inline ValueType  operator+=(ValueType n)          { return Add(n);}
+				inline ValueType  operator-=(ValueType n)          { return Add(-n);}
+				inline ValueType  operator++()                     { return Increment();}
+				inline ValueType  operator++(int)                  { return Increment() - 1;}
+				inline ValueType  operator--()                     { return Decrement(); }
+				inline ValueType  operator--(int)                  { return Decrement() + 1;}
+
+			protected:
+				volatile ValueType mValue;
+			};
+
+			#if defined(EA_PLATFORM_MICROSOFT) && defined(_MSC_VER)
+
+				// 32 bit versions
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+					{ return (ValueType)InterlockedExchangeImp((long*)&mValue, (long)n); } // Even though we shouldn't need to use InterlockedExchange on x86, the intrinsic x86 InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+					{ return (ValueType)InterlockedExchangeImp((long*)&mValue, (long)n); } // Even though we shouldn't need to use InterlockedExchange on x86, the intrinsic x86 InterlockedExchange is at least as fast as C code we would otherwise put here.
+
+				template<> inline
+				bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)InterlockedCompareExchangeImp((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+					{ return ((ValueType)InterlockedCompareExchangeImp((long*)&mValue, (long)n, (long)condition) == condition); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+					{ return (ValueType)InterlockedIncrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+					{ return (ValueType)InterlockedIncrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+					{ return (ValueType)InterlockedDecrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+					{ return (ValueType)InterlockedDecrementImp((long*)&mValue); }
+
+				template<> inline
+				AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+					{ return ((ValueType)InterlockedExchangeAddImp((long*)&mValue, (long)n) + n); }
+
+				template<> inline
+				AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+					{ return ((ValueType)InterlockedExchangeAddImp((long*)&mValue, (long)n) + n); }
+
+
+
+				// 64 bit versions
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const{
+					int64_t condition, nNewValue;
+					do{
+						nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+					} while(!InterlockedSetIfEqual(const_cast<int64_t*>(&mValue), nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const{
+					uint64_t condition, nNewValue;
+					do{
+						nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+					} while(!InterlockedSetIfEqual(const_cast<uint64_t*>(&mValue), nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n){
+					int64_t condition;
+					do{
+						condition = mValue;
+					} while(!InterlockedSetIfEqual(&mValue, n, condition));
+					return condition;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n){
+					uint64_t condition;
+					do{
+						condition = mValue;
+					} while(!InterlockedSetIfEqual(&mValue, n, condition));
+					return condition;
+				}
+
+				template<> inline
+				bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition){
+					return InterlockedSetIfEqual(&mValue, n, condition);
+				}
+
+				template<> inline
+				bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition){
+					return InterlockedSetIfEqual(&mValue, n, condition);
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment(){
+					int64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment(){
+					uint64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement(){
+					int64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition - 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement(){
+					uint64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition - 1;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n){
+					int64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + n;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+				template<> inline
+				AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n){
+					uint64_t condition, nNewValue;
+					do{
+						condition = mValue;
+						nNewValue = condition + n;
+					} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+					return nNewValue;
+				}
+
+
+			#elif defined(EA_COMPILER_GNUC) || defined (EA_COMPILER_CLANG)
+
+				// Recent versions of GCC have atomic primitives built into the compiler and standard library.
+				#if defined (EA_COMPILER_CLANG) || defined(__APPLE__) || (defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 403)) // GCC 4.3 or later
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const
+						{ return __sync_add_and_fetch(const_cast<ValueType*>(&mValue), 0); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n)
+						{ __sync_synchronize(); return __sync_lock_test_and_set(&mValue, n); }
+
+					template <> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return (__sync_val_compare_and_swap(&mValue, condition, n) == condition); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment()
+						{ return __sync_add_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement()
+						{ return __sync_sub_and_fetch(&mValue, 1); }
+
+					template <> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+					template <> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n)
+						{ return __sync_add_and_fetch(&mValue, n); }
+
+				#else
+
+					// If the above intrinsics aren't used...
+					#ifndef InterlockedCompareExchangeImp
+					namespace
+					{
+						int32_t InterlockedExchange(volatile int32_t* m, int32_t n)
+						{
+							int32_t result;
+
+							__asm__ __volatile__ (
+								"xchgl %%eax, (%2)" // The xchg instruction does an implicit lock instruction.
+								: "=a" (result)     // outputs
+								: "a" (n), "q" (m)  // inputs
+								: "memory"          // clobbered
+								);
+
+							return result;
+						}
+
+						int32_t InterlockedCompareExchange(volatile int32_t* m, int32_t n, int32_t condition)
+						{
+							int32_t result;
+
+							__asm__ __volatile__(
+								"lock; cmpxchgl %3, (%1) \n"        // Test *m against EAX, if same, then *m = n
+								: "=a" (result), "=q" (m)           // outputs
+								: "a" (condition), "q" (n), "1" (m) // inputs
+								: "memory"                          // clobbered
+								);
+
+							return result;
+						}
+
+						#define InterlockedExchangeImp        InterlockedExchange
+						#define InterlockedCompareExchangeImp InterlockedCompareExchange
+					}
+					#endif
+
+					// 32 bit versions
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::SetValue(ValueType n)
+						{ return (ValueType)InterlockedExchangeImp(&mValue, n); }
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::SetValue(ValueType n)
+						{ return (ValueType)InterlockedExchangeImp((int32_t*)&mValue, n); }
+
+					template<> inline
+					bool AtomicInt<int32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return ((ValueType)InterlockedCompareExchangeImp(&mValue, n, condition) == condition); }
+
+					template<> inline
+					bool AtomicInt<uint32_t>::SetValueConditional(ValueType n, ValueType condition)
+						{ return ((ValueType)InterlockedCompareExchangeImp((int32_t*)&mValue, n, condition) == condition); }
+
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Increment()
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (1), "m" (mValue)
+											: "memory"
+											);
+						return result + 1;
+					}
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Increment()
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (1), "m" (mValue)
+											: "memory"
+											);
+						return result + 1;
+					}
+
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Decrement()
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (-1), "m" (mValue)
+											: "memory"
+											);
+						return result - 1;
+					}
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Decrement()
+					{
+						uint32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (-1), "m" (mValue)
+											: "memory"
+											);
+						return result - 1;
+					}
+
+					template<> inline
+					AtomicInt<int32_t>::ValueType AtomicInt<int32_t>::Add(ValueType n)
+					{
+						int32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (n), "m" (mValue)
+											: "memory"
+											);
+						return result + n;
+					}
+
+					template<> inline
+					AtomicInt<uint32_t>::ValueType AtomicInt<uint32_t>::Add(ValueType n)
+					{
+						uint32_t result;
+
+						__asm__ __volatile__ ("lock; xaddl %0, %1"
+											: "=r" (result), "=m" (mValue)
+											: "0" (n), "m" (mValue)
+											: "memory"
+											);
+						return result + n;
+					}
+
+
+
+					// 64 bit versions
+
+					inline bool
+					InterlockedSetIfEqual(volatile int64_t* dest, int64_t newValue, int64_t condition)
+					{
+						int64_t oldValue;
+
+						__asm __volatile ("lock; cmpxchg8b %1"
+										 : "=A" (oldValue), "=m" (*dest)
+										 : "b" (((int32_t) newValue) & 0xffffffff),
+										   "c" ((int32_t)(newValue >> 32)),
+										   "m" (*dest), "a" (((int32_t) condition) & 0xffffffff),
+										   "d" ((int32_t)(condition >> 32)));
+
+						return oldValue == condition;
+
+						// Reference non-thread-safe implementation:
+						// if(*dest == condition)
+						// {
+						//     *dest = newValue
+						//     return true;
+						// }
+						// return false;
+					}
+
+					inline bool
+					InterlockedSetIfEqual(volatile uint64_t* dest, uint64_t newValue, uint64_t condition)
+					{
+						uint64_t oldValue;
+
+						__asm __volatile ("lock; cmpxchg8b %1"
+										 : "=A" (oldValue), "=m" (*dest)
+										 : "b" (((uint32_t) newValue) & 0xffffffff),
+										   "c" ((uint32_t)(newValue >> 32)),
+										   "m" (*dest), "a" (((uint32_t) condition) & 0xffffffff),
+										   "d" ((uint32_t)(condition >> 32)));
+
+						return oldValue == condition;
+
+						// Reference non-thread-safe implementation:
+						// if(*dest == condition)
+						// {
+						//     *dest = newValue
+						//     return true;
+						// }
+						// return false;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::GetValue() const{
+						int64_t condition, nNewValue;
+						do{
+							nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+						} while(!InterlockedSetIfEqual(const_cast<int64_t*>(&mValue), nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::GetValue() const{
+						uint64_t condition, nNewValue;
+						do{
+							nNewValue = condition = mValue; // Todo: This function has a problem unless the assignment of mValue to condition is atomic.
+						} while(!InterlockedSetIfEqual(const_cast<uint64_t*>(&mValue), nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::SetValue(ValueType n){
+						int64_t condition;
+						do{
+							condition = mValue;
+						} while(!InterlockedSetIfEqual(&mValue, n, condition));
+						return condition;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::SetValue(ValueType n){
+						uint64_t condition;
+						do{
+							condition = mValue;
+						} while(!InterlockedSetIfEqual(&mValue, n, condition));
+						return condition;
+					}
+
+					template<> inline
+					bool AtomicInt<int64_t>::SetValueConditional(ValueType n, ValueType condition){
+						return InterlockedSetIfEqual(&mValue, n, condition);
+					}
+
+					template<> inline
+					bool AtomicInt<uint64_t>::SetValueConditional(ValueType n, ValueType condition){
+						return InterlockedSetIfEqual(&mValue, n, condition);
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Increment(){
+						int64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Increment(){
+						uint64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Decrement(){
+						int64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition - 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Decrement(){
+						uint64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition - 1;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<int64_t>::ValueType AtomicInt<int64_t>::Add(ValueType n){
+						int64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + n;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+					template<> inline
+					AtomicInt<uint64_t>::ValueType AtomicInt<uint64_t>::Add(ValueType n){
+						uint64_t condition, nNewValue;
+						do{
+							condition = mValue;
+							nNewValue = condition + n;
+						} while(!InterlockedSetIfEqual(&mValue, nNewValue, condition));
+						return nNewValue;
+					}
+
+				#endif
+
+			#elif defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_BORLAND)
+
+				// This is won't compile when ValueType is 64 bits.
+
+				template<class T> inline 
+				typename AtomicInt<T>::ValueType AtomicInt<T>::SetValue(ValueType n)
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, n 
+						xchg eax, dword ptr [ecx]      // The xchg instruction does an implicit lock instruction.
+					}
+				}
+
+				template<class T> inline 
+				bool AtomicInt<T>::SetValueConditional(ValueType n, ValueType condition)
+				{
+					__asm{
+						mov  ecx, this                       // mValue is expected to be at offset zero of this.
+						mov  edx, n 
+						mov  eax, condition
+						lock cmpxchg dword ptr [ecx], edx    // Compares mValue to condition. If equal, z flag is set and n is copied into mValue.
+						jz    condition_met
+						xor  eax, eax
+						jmp  end
+						condition_met:
+						mov  eax, 1
+						end:
+					}
+				}
+
+				template<class T>  inline 
+				bool typename AtomicInt<T>::ValueType AtomicInt<T>::Increment()
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, 1 
+						lock xadd dword ptr [ecx], eax // Sum goes into [ecx], old mValue goes into eax.
+						inc  eax                       // Increment eax because the return value is the new value.
+					}
+				}
+
+				template<class T>  inline 
+				bool typename AtomicInt<T>::ValueType AtomicInt<T>::Decrement()
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, 0xffffffff
+						lock xadd dword ptr [ecx], eax // Sum goes into [ecx], old mValue goes into eax.
+						dec  eax                       // Increment eax because the return value is the new value.
+					}
+				}
+
+				template<class T>  inline 
+				bool typename AtomicInt<T>::ValueType AtomicInt<T>::Add(ValueType n)
+				{
+					__asm{
+						mov  ecx, this                 // mValue is expected to be at offset zero of this.
+						mov  eax, n 
+						lock xadd dword ptr [ecx], eax // Sum goes into [ecx], old mValue goes into eax.
+						add  eax, n
+					}
+				}
+
+
+			#else
+				// Compiler not currently supported.
+
+			#endif
+
+		} // namespace Thread
+
+	} // namespace EA
+
+
+#endif // EA_PROCESSOR_X86
+
+
+#ifdef _MSC_VER
+	 #pragma warning(pop)
+#endif
+
+
+#endif // EATHREAD_X86_EATHREAD_ATOMIC_X86_H
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 89 - 0
include/eathread/x86/eathread_sync_x86.h

@@ -0,0 +1,89 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_PRAGMA_ONCE_SUPPORTED)
+	#pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result.
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+// Functionality related to memory and code generation synchronization.
+/////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef EATHREAD_X86_EATHREAD_SYNC_X86_H
+#define EATHREAD_X86_EATHREAD_SYNC_X86_H
+
+
+#ifndef INCLUDED_eabase_H
+	#include <EABase/eabase.h>
+#endif
+
+
+#if defined(EA_PROCESSOR_X86)
+	#define EA_THREAD_SYNC_IMPLEMENTED
+
+	// By default, we define EA_TARGET_SMP to be true. The reason for this is that most 
+	// applications that users of this code are likely to write are going to be executables
+	// which run properly on any system, be it multiprocessing or not.
+	#ifndef EA_TARGET_SMP
+		#define EA_TARGET_SMP 1
+	#endif
+
+	// EAProcessorPause
+	// Intel has defined a 'pause' instruction for x86 processors starting with the P4, though this simply
+	// maps to the otherwise undocumented 'rep nop' instruction. This pause instruction is important for 
+	// high performance spinning, as otherwise a high performance penalty incurs. 
+
+	#if defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_BORLAND)
+		// Year 2003+ versions of the Microsoft SDK define 'rep nop' as YieldProcessor and/or __yield or _mm_pause. 
+		#define EAProcessorPause() __asm { rep nop } 
+	#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+		#define EAProcessorPause() __asm__ __volatile__ ("rep ; nop")
+	#else
+		// In this case we use an Intel-style asm statement. If this doesn't work for your compiler then 
+		// there most likely is some way to make the `rep nop` inline asm statement. 
+		#define EAProcessorPause() __asm { rep nop } // Alternatively: { __asm { _emit 0xf3 }; __asm { _emit 0x90 } }
+	#endif
+
+
+	// EAReadBarrier / EAWriteBarrier / EAReadWriteBarrier
+	// The x86 processor memory architecture ensures read and write consistency on both single and
+	// multi processing systems. This makes programming simpler but limits maximimum system performance.
+	// We define EAReadBarrier here to be the same as EACompilerMemory barrier in order to limit the 
+	// compiler from making any assumptions at its level about memory usage. Year 2003+ versions of the 
+	// Microsoft SDK define a 'MemoryBarrier' statement which has the same effect as EAReadWriteBarrier.
+	#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 401) // GCC 4.1 or later
+		#define EAReadBarrier      __sync_synchronize
+		#define EAWriteBarrier     __sync_synchronize
+		#define EAReadWriteBarrier __sync_synchronize
+	#else
+		#define EAReadBarrier      EACompilerMemoryBarrier
+		#define EAWriteBarrier     EACompilerMemoryBarrier
+		#define EAReadWriteBarrier EACompilerMemoryBarrier
+	#endif
+
+	// EACompilerMemoryBarrier
+	#if (defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300) && defined(EA_PLATFORM_MICROSOFT)) || (defined(EA_COMPILER_INTEL) && (EA_COMPILER_VERSION >= 9999999)) // VC7+ or Intel (unknown version at this time)
+		extern "C" void _ReadWriteBarrier();
+		#pragma intrinsic(_ReadWriteBarrier)
+		#define EACompilerMemoryBarrier() _ReadWriteBarrier()
+	#elif defined(EA_COMPILER_GNUC)
+		#define EACompilerMemoryBarrier() __asm__ __volatile__ ("":::"memory")
+	#else
+		#define EACompilerMemoryBarrier() // Possibly `EAT_ASSERT(false)` here?
+	#endif
+
+
+#endif // EA_PROCESSOR_X86
+
+
+#endif // EATHREAD_X86_EATHREAD_SYNC_X86_H
+
+
+
+
+
+
+
+

+ 21 - 0
source/android/com_ea_EAThread_EAThread.h

@@ -0,0 +1,21 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class com_ea_EAThread_EAThread */
+
+#ifndef _Included_com_ea_EAThread_EAThread
+#define _Included_com_ea_EAThread_EAThread
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     com_ea_EAThread_EAThread
+ * Method:    Init
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_com_ea_EAThread_EAThread_Init
+  (JNIEnv *, jclass);
+
+#ifdef __cplusplus
+}
+#endif
+#endif

+ 10 - 0
source/android/eathread_android.cpp

@@ -0,0 +1,10 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+
+
+

+ 84 - 0
source/android/eathread_fake_atomic_64.cpp

@@ -0,0 +1,84 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+/// Pseudo implementation of 64 bit primitives modelled after Android's internals.
+/// Return values and semantics are intended to be the same as 32 bit versions.
+///
+/// Basically just does a mutex lock around the operation. Rather than just
+/// one global lock, uses a fixed set of mutexes to lock based on incoming
+/// address to reduce contention.
+///
+/// Abuses the fact that the initializer for a pthread_mutex_t in Android is
+/// simply "{0}" on a volatile int to avoid requiring global initialization of
+/// these mutexes.
+
+
+#include <EABase/eabase.h>
+
+#if defined(EA_PLATFORM_ANDROID)
+
+#include <pthread.h>
+
+namespace EA
+{
+namespace Thread
+{
+
+#define EAT_FAKE_ATOMIC_SWAP_LOCK_COUNT 32U
+static pthread_mutex_t sFakeAtomic64SwapLocks[EAT_FAKE_ATOMIC_SWAP_LOCK_COUNT];
+
+#define EAT_SWAP_LOCK(addr) &sFakeAtomic64SwapLocks[((unsigned)(void*)(addr) >> 3U) % EAT_FAKE_ATOMIC_SWAP_LOCK_COUNT]
+
+
+int64_t android_fake_atomic_swap_64(int64_t value, volatile int64_t* addr)
+{
+	int64_t oldValue;
+	pthread_mutex_t* lock = EAT_SWAP_LOCK(addr);
+
+	pthread_mutex_lock(lock);
+
+	oldValue = *addr;
+	*addr = value;
+
+	pthread_mutex_unlock(lock);
+	return oldValue;
+}
+
+
+int android_fake_atomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, volatile int64_t* addr)
+{
+	int ret;
+	pthread_mutex_t* lock = EAT_SWAP_LOCK(addr);
+
+	pthread_mutex_lock(lock);
+
+	if (*addr == oldvalue)
+	{
+		*addr = newvalue;
+		ret = 0;
+	}
+	else
+	{
+		ret = 1;
+	}
+	pthread_mutex_unlock(lock);
+	return ret;
+}
+
+
+int64_t android_fake_atomic_read_64(volatile int64_t* addr)
+{
+	int64_t ret;
+	pthread_mutex_t* lock = EAT_SWAP_LOCK(addr);
+
+	pthread_mutex_lock(lock);
+	ret = *addr;
+	pthread_mutex_unlock(lock);
+	return ret;
+}
+
+} // namespace Thread
+} // namespace EA
+
+#endif // #if defined(EA_PLATFORM_ANDROID)

+ 226 - 0
source/android/eathread_semaphore_android.cpp

@@ -0,0 +1,226 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef INCLUDED_eabase_H
+   #include <EABase/eabase.h>
+#endif
+#ifndef EATHREAD_EATHREAD_SEMAPHORE_H
+   #include <eathread/eathread_semaphore.h>
+#endif
+
+
+#if defined(EA_PLATFORM_ANDROID)
+	#include <time.h>
+	#include <string.h>
+	#include <limits.h>
+	#include <stdio.h>
+	#include <sys/errno.h>
+
+	EASemaphoreData::EASemaphoreData()
+		: mnCount(0), mnMaxCount(INT_MAX)
+	{
+		memset(&mSemaphore, 0, sizeof(mSemaphore)); 
+	}
+
+
+	EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* /*pName*/)
+		: mInitialCount(initialCount), mMaxCount(INT_MAX), mbIntraProcess(bIntraProcess)
+	{
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
+	{
+		if(!pSemaphoreParameters && bDefaultParameters)
+		{
+			SemaphoreParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pSemaphoreParameters);
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(int initialCount)
+	{
+		SemaphoreParameters parameters(initialCount);
+		Init(&parameters);
+	}
+
+
+	EA::Thread::Semaphore::~Semaphore()
+	{
+		sem_destroy(&mSemaphoreData.mSemaphore);   
+
+		// Can't use the following because Android's sem_destroy is broken. http://code.google.com/p/android/issues/detail?id=3106
+		//
+		// int result = -1;
+		// 
+		// for(;;)
+		// {
+		//     result = sem_destroy(&mSemaphoreData.mSemaphore);
+		// 
+		//     if((result == -1) && (errno == EBUSY)) // If another thread or process is blocked on this semaphore...
+		//         ThreadSleep(kTimeoutYield);        // Yield. If we don't yield, it's possible we could block other other threads or processes from running, on some systems.
+		//     else
+		//         break;
+		// }
+		// 
+		// EAT_ASSERT(result != -1);
+	}
+
+
+	bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
+	{
+		if(pSemaphoreParameters)
+		{
+			mSemaphoreData.mnCount    = pSemaphoreParameters->mInitialCount;
+			mSemaphoreData.mnMaxCount = pSemaphoreParameters->mMaxCount;
+
+			if(mSemaphoreData.mnCount < 0)
+				mSemaphoreData.mnCount = 0;
+
+			// TODO intraprocess not supported on Android. Assert? Fail?
+
+			mSemaphoreData.mbIntraProcess = false;
+			int result = sem_init(&mSemaphoreData.mSemaphore, 0, (unsigned int)mSemaphoreData.mnCount);
+			if(result != 0)
+			{
+				EAT_ASSERT(false);
+				memset(&mSemaphoreData.mSemaphore, 0, sizeof(mSemaphoreData.mSemaphore));
+			}
+
+			return (result != -1);
+		}
+
+		return false;
+	}
+
+
+	int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
+	{
+		int result;
+
+		if(timeoutAbsolute == kTimeoutNone)
+		{
+			// We retry waits that were interrupted by signals. Should we instead require
+			// the user to deal with this and return an error value? Or should we require
+			// the user to disable the appropriate signal interruptions?
+			while(((result = sem_wait(&mSemaphoreData.mSemaphore)) != 0) && (errno == EINTR))
+			{
+				continue;
+			}
+			int val;
+			sem_getvalue(&mSemaphoreData.mSemaphore, &val);
+
+			if(result != 0)
+			{
+				EAT_ASSERT(false); // This is an error condition.
+				return kResultError;
+			}
+		}
+		else if(timeoutAbsolute == kTimeoutImmediate)
+		{
+			// The sem_trywait() and sem_wait() functions shall return zero if the calling process successfully 
+			// performed the semaphore lock operation on the semaphore designated by sem. If the call was 
+			// unsuccessful, the state of the semaphore shall be unchanged, and the function shall return a 
+			// value of -1 and set errno to indicate the error.
+			int trywaitResult = sem_trywait(&mSemaphoreData.mSemaphore);
+
+			if(trywaitResult == -1)
+			{
+				if(errno == EAGAIN) // The sem_* family of functions are different from pthreads because they set errno instead of returning an error value.
+					return kResultTimeout;
+
+				return kResultError;
+			}
+
+			// Android sem_trywait is broken and in earlier versions returns EAGAIN instead of setting 
+			// errno to EAGAIN. http://source-android.frandroid.com/bionic/libc/docs/CHANGES.TXT
+			#if defined(EA_PLATFORM_ANDROID) 
+				if(trywaitResult == EAGAIN)
+					return kResultTimeout;
+			#endif
+		}
+		else
+		{
+			// Some systems don't have a sem_timedwait. In these cases we 
+			// fall back to a polling mechanism. However, polling really
+			// isn't proper because the polling thread might be at a greater 
+			// priority level than the lock-owning thread and thus this code
+			// might not work as well as desired.
+
+			// We retry waits that were interrupted by signals. Should we instead require
+			// the user to deal with this and return an error value? Or should we require
+			// the user to disable the appropriate signal interruptions?
+			while(((result = sem_timedwait(&mSemaphoreData.mSemaphore, &timeoutAbsolute)) != 0) && (errno == EINTR))
+			{
+				continue;
+			}
+
+			if(result != 0)
+			{
+				if(errno == ETIMEDOUT)
+					return kResultTimeout;
+
+				return kResultError;
+			}
+		}
+
+		EAT_ASSERT(mSemaphoreData.mnCount > 0);
+		return (int)mSemaphoreData.mnCount.Decrement(); // AtomicInt32 operation. Note that the value of the semaphore count could change from the returned value by the time the caller reads it. This is fine but the user should understand this.
+	}
+
+
+	int EA::Thread::Semaphore::Post(int count)
+	{
+		// Some systems have a sem_post_multiple which we could take advantage 
+		// of here to atomically post multiple times.
+		EAT_ASSERT(mSemaphoreData.mnCount >= 0);
+
+		// It's hard to correctly implement mnMaxCount here, given that it 
+		// may be modified by multiple threads during this execution. So if you want
+		// to use max-count with an IntraProcess semaphore safely then you need to 
+		// post only from a single thread, or at least a single thread at a time.
+		
+		int currentCount = mSemaphoreData.mnCount;
+
+		// If count would cause an overflow exit early
+		if ((mSemaphoreData.mnMaxCount - count) < currentCount)
+			return kResultError;
+
+		currentCount += count;
+
+		while(count-- > 0)
+		{
+			++mSemaphoreData.mnCount;     // AtomicInt32 operation.
+
+			if(sem_post(&mSemaphoreData.mSemaphore) != 0)
+			{
+				--mSemaphoreData.mnCount; // AtomicInt32 operation.
+				EAT_ASSERT(false);
+				return kResultError;        
+			}
+		}
+
+		// If all count posts occurred...
+		return currentCount; // It's possible that another thread may have modified this value since we changed it, but that's not important.
+	}
+
+
+	int EA::Thread::Semaphore::GetCount() const
+	{
+		return (int)mSemaphoreData.mnCount;
+	}
+
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+
+
+
+

+ 829 - 0
source/apple/eathread_callstack_apple.cpp

@@ -0,0 +1,829 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <eathread/eathread.h>
+#include <eathread/eathread_futex.h>
+#include <eathread/eathread_storage.h>
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <eathread/apple/eathread_callstack_apple.h>
+#include <mach/thread_act.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <dlfcn.h>
+#include <new>
+
+
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+#include <mach-o/dyld_images.h> //dyld_all_image_infos
+#include <mach-o/dyld.h> //segment_command(_64)
+#include <mach/task.h> //task_info
+
+#if defined(EA_PLATFORM_IPHONE)
+    //On iPhone, this gets pulled in dynamically through libproc.dylib
+    extern "C" int proc_regionfilename(int pid, uint64_t address, void * buffer, uint32_t buffersize);
+#else
+    #include <libproc.h> //proc_regionfilename
+#endif
+#endif
+
+
+#if defined(__LP64__)
+typedef struct mach_header_64     MachHeader;
+typedef struct segment_command_64 SegmentCommand;
+typedef struct section_64         Section;
+#define kLCSegment                LC_SEGMENT_64
+#else
+typedef struct mach_header        MachHeader;
+typedef struct segment_command    SegmentCommand;
+typedef struct section            Section;
+#define kLCSegment                LC_SEGMENT
+#endif
+
+
+#if EACALLSTACK_GLIBC_BACKTRACE_AVAILABLE
+	#include <signal.h>
+	#include <execinfo.h>
+#endif
+
+
+
+namespace EA
+{
+namespace Thread
+{
+
+
+///////////////////////////////////////////////////////////////////////////////
+// gModuleInfoApple
+// 
+// We keep a cached array of the module info. It's possible that the module
+// info could change at runtime, though for our purposes the changes don't 
+// usually matter. Nevertheless that's a limitation of this scheme and we
+// may need to do something about it in the future.
+// This global array is freed in ShutdownCallstack.
+// Currently this array is stored per-DLL when EAThread is built as a DLL.
+//
+static ModuleInfoApple* gModuleInfoAppleArray      = NULL;
+static size_t           gModuleInfoAppleArrayCount = 0;
+static Futex*           gCallstackFutex            = NULL;
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ReallocModuleInfoApple
+//
+// This is not a fully generic Realloc function. It currently reallocs only
+// to a greater size or to zero, which is fine for our purposes. The caller
+// of this function needs to be aware that the Realloc may fail and should
+// use gModuleInfoAppleArrayCount as the array count and not the value passed
+// to this function.
+//
+static ModuleInfoApple* ReallocModuleInfoApple(size_t newCount)
+{
+	if(gCallstackFutex)
+		gCallstackFutex->Lock();
+
+	EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+	EAT_ASSERT_MSG(pAllocator != NULL, "EA::Thread::SetAllocator needs to be called on app startup.");
+	if(pAllocator)
+	{
+		if(newCount > gModuleInfoAppleArrayCount) // If increasing in size...
+		{
+			size_t allocSize   = sizeof(ModuleInfoApple) * newCount;
+			void*  allocMemory = pAllocator->Alloc(allocSize);
+
+			if(allocMemory)
+			{
+				ModuleInfoApple* pNew = new(allocMemory) ModuleInfoApple[newCount]; // Placement new always succeeds.
+				
+				if(gModuleInfoAppleArray && gModuleInfoAppleArray)
+				{
+					// gModuleInfoAppleArrayCount is guaranteed to be < newCount for this memcpy.
+					memcpy(pNew, gModuleInfoAppleArray, sizeof(ModuleInfoApple) * gModuleInfoAppleArrayCount);
+					pAllocator->Free(gModuleInfoAppleArray);
+				}
+
+				gModuleInfoAppleArray      = pNew;
+				gModuleInfoAppleArrayCount = newCount;
+			}
+			// Else fall through and use the existing gModuleInfoAppleArray.
+		}
+		else if(newCount == 0) // If freeing...
+		{
+			if(gModuleInfoAppleArray)
+			{
+				pAllocator->Free(gModuleInfoAppleArray);
+				gModuleInfoAppleArray      = NULL;
+				gModuleInfoAppleArrayCount = 0;
+			}
+		}
+		// Else we do nothing for the case of requesting a newCount < gModuleInfoAppleArrayCount.
+	}
+
+	if(gCallstackFutex)
+		gCallstackFutex->Unlock();
+
+	return gModuleInfoAppleArray; // gModuleInfoAppleArrayCount indicates the capacity of this array.
+}
+
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+// This fills a moduleInfoApple object with the information from all of the segments listed in the given mach_header's segments, starting at the given currentSegmentPos. It also puts the pModulePath info the moduleInfoApple object, which is then push_back on the given array.
+//
+// The results are appended to pModuleInfoAppleArray up to its capacity
+// pTypeFilter is used to filter out segment types
+// pModulePath is the path corresponding to the given pMachHeader. It is assumed it is NullTerminated
+// currentSegmentPos is the starting segment we are iterating over
+// pMachHeader is the mach_header with all the segment information
+void CreateModuleInfoApple(ModuleInfoApple* pModuleInfoAppleArray, size_t arrayCapacity, size_t& requiredArraySize, size_t& arraySize,
+                           const char* pTypeFilter, const char* pModulePath, uintptr_t currentSegmentPos, const MachHeader* pMachHeader, intptr_t offset)
+{
+    for(uint32_t i = 0; i < pMachHeader->ncmds; i++) // Look at each command, paying attention to LC_SEGMENT/LC_SEGMENT_64 (segment_command) commands.
+    {
+        const SegmentCommand* pSegmentCommand = reinterpret_cast<const SegmentCommand*>(currentSegmentPos); // This won't actually be a segment_command unless the type is kLCSegment
+        
+        if(pSegmentCommand != NULL && pSegmentCommand->cmd == kLCSegment) // If this really is a segment_command... (otherwise it is some other kind of command)
+        {
+            const size_t segnameBufferLen = sizeof(pSegmentCommand->segname) + 1;
+            
+            char segnameBuffer[segnameBufferLen];
+            memcpy(segnameBuffer, pSegmentCommand->segname, sizeof(pSegmentCommand->segname));
+            segnameBuffer[segnameBufferLen-1] = '\0'; // Incase segname was not 0-terminated
+            
+            if(!pTypeFilter || strncmp(segnameBuffer, pTypeFilter, sizeof(segnameBuffer)))
+            {
+                requiredArraySize++;
+                
+                if (arraySize < arrayCapacity)
+                {
+                    ModuleInfoApple& info = pModuleInfoAppleArray[arraySize++];
+                
+                    uint64_t uOffset = (uint64_t)offset;
+                    info.mBaseAddress = (uint64_t)(pSegmentCommand->vmaddr + uOffset);
+                    // info.mModuleHandle = reinterpret_cast<ModuleHandle>((uintptr_t)info.mBaseAddress);
+                    info.mSize = (uint64_t)pSegmentCommand->vmsize;
+                    
+                    // Copy modulePath to info.mPath.
+                    strlcpy(info.mPath, pModulePath, EAArrayCount(info.mPath));
+
+                    // Get the beginning of the file name within modulePath and copy the file name to info.mName.
+                    const char* pDirSeparator = strrchr(pModulePath, '/');
+                    if(pDirSeparator)
+                        pDirSeparator++;
+                    else
+                        pDirSeparator = pModulePath;
+                    strlcpy(info.mName, pDirSeparator, EAArrayCount(info.mName));
+                    
+                    info.mPermissions[0] = (pSegmentCommand->initprot & VM_PROT_READ)    ? 'r' : '-';
+                    info.mPermissions[1] = (pSegmentCommand->initprot & VM_PROT_WRITE)   ? 'w' : '-';
+                    info.mPermissions[2] = (pSegmentCommand->initprot & VM_PROT_EXECUTE) ? 'x' : '-';
+                    info.mPermissions[3] = '/';
+                    info.mPermissions[4] = (pSegmentCommand->maxprot & VM_PROT_READ)    ? 'r' : '-';
+                    info.mPermissions[5] = (pSegmentCommand->maxprot & VM_PROT_WRITE)   ? 'w' : '-';
+                    info.mPermissions[6] = (pSegmentCommand->maxprot & VM_PROT_EXECUTE) ? 'x' : '-';
+                    info.mPermissions[7] = '\0';
+                    
+                    strlcpy(info.mType,pSegmentCommand->segname,EAArrayCount(info.mType));
+                    //**********************************************************************************
+                    //For Debugging Purposes
+                    //__TEXT                 0000000100000000-0000000100001000 [    4K] r-x/rwx SM=COW  /Build/Products/Debug/TestProject.app/Contents/MacOS/TestProject
+                    //printf("%20s %llx-%llx %s %s\n", segnameBuffer, (unsigned long long)info.mBaseAddress, (unsigned long long)(info.mBaseAddress + pSegmentCommand->vmsize), info.mPermissions, pModulePath);
+                    //**********************************************************************************/
+                }
+            }
+            
+        }
+        currentSegmentPos += pSegmentCommand->cmdsize;
+    }
+}
+#endif // EATHREAD_APPLE_GETMODULEINFO_ENABLED
+
+
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+// GetModuleInfoApple
+//
+// This function exists for the purpose of being a central module/VM map info collecting function,
+// used by a couple functions within EACallstack.
+//
+// We used to use vmmap and parse the output
+// https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man1/vmmap.1.html
+// But starting ~osx 10.9 vmmap can not be called due to new security restrictions
+//
+// I tried using ::mach_vm_region, but I was unable to find the type of the segments (__TEXT, etc.)
+// and system libraries addresses were given, but their name/modulePath was not.
+// ::mach_vm_region_recurse did not solve this problem either.
+//
+// Replaced _dyld_get_all_image_infos() call with task_info() call as the old call is no longer available
+// with osx 10.13 (High Sierra).
+size_t GetModuleInfoApple(ModuleInfoApple* pModuleInfoAppleArray, size_t arrayCapacity,
+                            const char* pTypeFilter, bool bEnableCache)
+{
+    // The following is present to handle the case that the user forgot to call EA::Thread::InitCallstack().
+    // We don't match this with a ShutdownCallstack, and so the user might see memory leaks in that case.
+    // The user should call EA::Thread::InitCallstack on app startup and EA::Thread::ShutdownCallstack on 
+    // app shutdown, at least if the user wants to use this function.
+    if(!gCallstackFutex)
+        InitCallstack();
+        
+    size_t requiredArraySize = 0;
+    size_t arraySize = 0;
+
+    if(bEnableCache)
+    {
+        if(gCallstackFutex)
+            gCallstackFutex->Lock();
+
+        if(gModuleInfoAppleArrayCount == 0) // If nothing is cached...
+        {
+            // Call ourselves recursively, for the sole purpose of getting the required size and filling the cache.
+            // We call GetModuleInfoApple with a NULL filter (get all results). This may result in a required size that's
+            // greater than the size needed for the user's possibly supplied filter. Thus we have a variable here 
+            // called maxRequiredArraySize.
+            
+            const size_t maxRequiredArraySize = GetModuleInfoApple(NULL, 0, NULL, false);
+            ReallocModuleInfoApple(maxRequiredArraySize); // If the realloc fails, the code below deals with it safely.
+
+            // Call ourselves recursively, for the purpose of filling in the cache.
+            GetModuleInfoApple(gModuleInfoAppleArray, gModuleInfoAppleArrayCount, NULL, false); 
+        }
+        
+        // Copy our cache to the user's supplied array, while applying the filter and updating requiredArraySize.
+        for(size_t i = 0, iEnd = gModuleInfoAppleArrayCount; i != iEnd; i++)
+        {
+            const ModuleInfoApple& mia = gModuleInfoAppleArray[i];
+
+            if(!pTypeFilter || strstr(mia.mType, pTypeFilter)) // If the filter matches...
+            {
+                requiredArraySize++;
+
+                if(arraySize < arrayCapacity) // If there is room in the user-supplied array...
+                {
+                    ModuleInfoApple& miaUser = pModuleInfoAppleArray[arraySize++];
+                    memcpy(&miaUser, &mia, sizeof(ModuleInfoApple));
+                }
+            }
+        }
+
+        if(gCallstackFutex)
+            gCallstackFutex->Unlock();
+    }
+    else
+    {
+        struct task_dyld_info t_info;
+        uint32_t t_info_count = TASK_DYLD_INFO_COUNT;
+        kern_return_t kr = task_info(mach_task_self(), TASK_DYLD_INFO, (task_info_t)&t_info, &t_info_count);
+        if (kr != KERN_SUCCESS)
+        {
+            EAT_ASSERT_FORMATTED(false, "GetModuleInfoApple: task_info() returned %d", kr);
+            return 0;
+        }
+        const struct dyld_all_image_infos* pAllImageInfos = (const struct dyld_all_image_infos *)t_info.all_image_info_addr;
+        
+        for(uint32_t i = 0; i < pAllImageInfos->infoArrayCount; i++)
+        {
+            const char* pModulePath = pAllImageInfos->infoArray[i].imageFilePath;
+            if(pModulePath != NULL && strncmp(pModulePath, "", PATH_MAX) != 0)
+            {
+                uintptr_t         currentSegmentPos = (uintptr_t)pAllImageInfos->infoArray[i].imageLoadAddress;
+                const MachHeader* pMachHeader       = reinterpret_cast<const MachHeader*>(currentSegmentPos);
+                EAT_ASSERT(pMachHeader != NULL);
+                currentSegmentPos += sizeof(*pMachHeader);
+                
+                // The system library addresses we obtain are the linker address.
+                // So we need to get the get the dynamic loading offset
+                // The offset is also stored in pAllImageInfos->sharedCacheSlide, but there is no way
+                // to know whether or not it should get used on each image. (dyld and our executable images do not slide)
+                // http://lists.apple.com/archives/darwin-kernel/2012/Apr/msg00012.html
+                intptr_t offset = _dyld_get_image_vmaddr_slide(i);
+                CreateModuleInfoApple(pModuleInfoAppleArray, arrayCapacity, requiredArraySize, arraySize,
+                                      pTypeFilter, pModulePath, currentSegmentPos, pMachHeader, offset);
+            }
+        }
+        
+        // Iterating on dyld_all_image_infos->infoArray[] does not give us entries for /usr/lib/dyld.
+        // We use the mach_header to get /usr/lib/dyld
+        const MachHeader* pMachHeader = (const MachHeader*)pAllImageInfos->dyldImageLoadAddress;
+        uintptr_t         currentSegmentPos = (uintptr_t)pMachHeader + sizeof(*pMachHeader);
+        char modulePath[PATH_MAX] = "";
+        pid_t  pid = getpid();
+        int filenameLen = proc_regionfilename((int)pid,currentSegmentPos,modulePath,(uint32_t)sizeof(modulePath));
+        EAT_ASSERT(filenameLen > 0 && modulePath != NULL && strncmp(modulePath,"",sizeof(modulePath)) != 0);
+        if(filenameLen > 0)
+        {
+            CreateModuleInfoApple(pModuleInfoAppleArray, arrayCapacity, requiredArraySize, arraySize,
+                                  pTypeFilter, modulePath, currentSegmentPos, pMachHeader, 0); // offset is 0 because dyld is already loaded
+        }
+        
+        // Use this to compare results
+        // printf("vmmap -w %lld", (int64_t)pid);
+    }
+
+	return requiredArraySize;
+}
+#endif // EATHREAD_APPLE_GETMODULEINFO_ENABLED
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetInstructionPointer
+//
+EATHREADLIB_API void GetInstructionPointer(void*& pInstruction)
+{
+	pInstruction = __builtin_return_address(0); // Works for all Apple platforms and compilers (gcc and clang).
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// InitCallstack
+//
+EATHREADLIB_API void InitCallstack()
+{
+	EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+	EAT_ASSERT_MSG(pAllocator != NULL, "EA::Thread::SetAllocator needs to be called on app startup.");
+	if(pAllocator)
+		gCallstackFutex = new(pAllocator->Alloc(sizeof(Futex))) Futex;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ShutdownCallstack
+//
+EATHREADLIB_API void ShutdownCallstack()
+{
+	EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+	EAT_ASSERT_MSG(pAllocator != NULL, "EAThread requires an allocator to be available between InitCallstack and ShutdownCallstack.");
+	if(pAllocator)
+	{
+		if(gModuleInfoAppleArray)
+			ReallocModuleInfoApple(0);
+
+		if(gCallstackFutex)
+		{
+			pAllocator->Free(gCallstackFutex);
+			gCallstackFutex = NULL;
+		}
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstack
+//
+// Capture up to nReturnAddressArrayCapacity elements of the call stack, 
+// or the whole callstack, whichever is smaller. 
+//
+// ARM
+//      Apple defines a different ABI than the ARM eabi used by Linux and the ABI used
+//      by Microsoft. It implements a predictable stack frame system using r7 as the 
+//      frame pointer. Documentation:
+//          http://developer.apple.com/library/ios/#documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html
+//
+//      Basically, Apple uses r7 as a frame pointer. So for any function you are
+//      executing, r7 + 4 is the LR passed to us by the caller and is the PC of 
+//      the parent. And r7 + 0 is a pointer to the parent's r7. 
+// x86/x64
+//      The ABI is similar except using the different registers from the different CPU.
+//
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+{
+	#if defined(EA_DEBUG)
+		memset(pReturnAddressArray, 0, nReturnAddressArrayCapacity * sizeof(void*));
+	#endif
+	
+	#if defined(EA_PROCESSOR_ARM) || defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)
+		
+		struct StackFrame {
+			StackFrame* mpParentStackFrame;
+			void*       mpReturnPC;
+		};
+		
+		StackFrame* pStackFrame;
+		void*       pInstruction;
+		size_t      index = 0;
+
+		if(pContext)
+		{
+			#if defined(EA_PROCESSOR_ARM32)
+				pStackFrame  = (StackFrame*)pContext->mFP;
+				pInstruction = (void*)      pContext->mPC;
+				#define FrameIsAligned(pStackFrame) ((((uintptr_t)pStackFrame) & 0x1) == 0)
+
+			#elif defined(EA_PROCESSOR_ARM64)
+				pStackFrame  = (StackFrame*)pContext->mFP;
+				pInstruction = (void*)      pContext->mPC;
+				#define FrameIsAligned(pStackFrame) ((((uintptr_t)pStackFrame) & 0xf) == 0)
+								
+			#elif defined(EA_PROCESSOR_X86_64)
+				pStackFrame  = (StackFrame*)pContext->mRBP;
+				pInstruction = (void*)      pContext->mRIP;
+				#define FrameIsAligned(pStackFrame) ((((uintptr_t)pStackFrame) & 0xf) == 0)
+
+			#elif defined(EA_PROCESSOR_X86)
+				pStackFrame  = (StackFrame*)pContext->mEBP;
+				pInstruction = (void*)      pContext->mEIP;
+				#define FrameIsAligned(pStackFrame) ((((uintptr_t)pStackFrame) & 0xf) == 8)
+				
+			#endif
+
+			// Write the instruction to pReturnAddressArray. In this case we have this thread 
+			// reading the callstack from another thread.
+			if(index < nReturnAddressArrayCapacity)
+				pReturnAddressArray[index++] = pInstruction;
+		}
+		else // Else get the current values...
+		{
+			pStackFrame = (StackFrame*)__builtin_frame_address(0);
+			GetInstructionPointer(pInstruction); // Intentionally don't call EAGetInstructionPointer, because it won't set the Thumb bit if this is Thumb code.
+
+			// Don't write pInstruction to pReturnAddressArray, as pInstruction refers to the code in *this* function, whereas we want to start with caller's call frame.
+		}
+
+		// We can do some range validation if we have a pthread id.
+		StackFrame* pStackBase;
+		StackFrame* pStackLimit;
+		const bool  bThreadIsCurrent = (pContext == NULL); // To do: allow this to also tell if the thread is current for the case that pContext is non-NULL. We can do that by reading the current frame address and walking it backwards a few times and seeing if any value matches pStackFrame. 
+		
+		if(bThreadIsCurrent)
+		{
+			pthread_t pthread = pthread_self(); // This makes the assumption that the current thread is a pthread and not just a kernel thread.
+			pStackBase  = reinterpret_cast<StackFrame*>(pthread_get_stackaddr_np(pthread));
+			pStackLimit = pStackBase - (pthread_get_stacksize_np(pthread) / sizeof(StackFrame));
+		}
+		else
+		{   // Make a conservative guess.
+			pStackBase  = pStackFrame + ((1024 * 1024) / sizeof(StackFrame));
+			pStackLimit = pStackFrame - ((1024 * 1024) / sizeof(StackFrame));
+		}
+
+		// To consider: Do some validation of the PC. We can validate it by making sure it's with 20 MB 
+		// of our PC and also verify that the instruction before it (be it Thumb or ARM) is a BL or BLX 
+		// function call instruction (in the case of EA_PROCESSOR_ARM).
+		// To consider: Verify that each successive pStackFrame is at a higher address than the last,
+		// as otherwise the data must be corrupt.
+
+		if((index < nReturnAddressArrayCapacity) && pStackFrame && FrameIsAligned(pStackFrame))
+		{
+			pReturnAddressArray[index++] = pStackFrame->mpReturnPC;  // Should happen to be equal to pContext->mLR.
+
+			while(pStackFrame && pStackFrame->mpReturnPC && (index < nReturnAddressArrayCapacity)) 
+			{
+				pStackFrame = pStackFrame->mpParentStackFrame;
+
+				if(pStackFrame && FrameIsAligned(pStackFrame) && pStackFrame->mpReturnPC && (pStackFrame < pStackBase) && (pStackFrame > pStackLimit))
+					pReturnAddressArray[index++] = pStackFrame->mpReturnPC;
+				else
+					break;
+			}
+		}
+
+		return index;
+
+	
+	#elif EACALLSTACK_GLIBC_BACKTRACE_AVAILABLE // Mac OS X with GlibC
+
+		// One way to get the callstack of another thread, via signal handling:
+		//     https://github.com/albertz/openlierox/blob/0.59/src/common/Debug_GetCallstack.cpp
+		
+		size_t count = 0;
+
+		// The pContext option is not currently supported.
+		if(pContext == NULL) // To do: || pContext refers to this thread.
+		{
+			count = (size_t)backtrace(pReturnAddressArray, (int)nReturnAddressArrayCapacity);
+			if(count > 0)
+			{
+				--count; // Remove the first entry, because it refers to this function and by design we don't include this function.
+				memmove(pReturnAddressArray, pReturnAddressArray + 1, count * sizeof(void*));
+			}
+		}
+		// else fall through to code that manually reads stack frames?
+		
+		return count;
+
+	#else
+		EA_UNUSED(pReturnAddressArray);
+		EA_UNUSED(nReturnAddressArrayCapacity);
+		EA_UNUSED(pContext);
+
+		return 0;
+	#endif
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+// Convert a full Context to a CallstackContext (subset of context).
+//
+EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
+{
+	#if defined(EA_PROCESSOR_X86_64)
+		context.mRIP = pContext->Rip;
+		context.mRSP = pContext->Rsp;
+		context.mRBP = pContext->Rbp;
+		
+	#elif defined(EA_PROCESSOR_X86)
+		context.mEIP = pContext->Eip;
+		context.mESP = pContext->Esp;
+		context.mEBP = pContext->Ebp;
+		
+	#elif defined(EA_PROCESSOR_ARM32)
+		context.mFP  = pContext->mGpr[7];   // Apple uses R7 for the frame pointer in both ARM and Thumb CPU modes.
+		context.mSP  = pContext->mGpr[13];
+		context.mLR  = pContext->mGpr[14];
+		context.mPC  = pContext->mGpr[15];
+
+	#elif defined(EA_PROCESSOR_ARM64)
+		context.mFP  = pContext->mGpr[29];   
+		context.mSP  = pContext->mGpr[31]; 
+		context.mLR  = pContext->mGpr[30];
+		context.mPC  = pContext->mPC;
+		
+	#else
+		EAT_FAIL_MSG("Platform unsupported");
+	#endif
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleFromAddress
+//
+// Returns the required strlen of pModuleName.
+//
+EATHREADLIB_API size_t GetModuleFromAddress(const void* pCodeAddress, char* pModuleName, size_t moduleNameCapacity)
+{
+	if(moduleNameCapacity > 0)
+		pModuleName[0] = 0;
+
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+	Dl_info dlInfo; memset(&dlInfo, 0, sizeof(dlInfo)); // Just memset because dladdr sometimes leaves dli_fname untouched.
+	int     result = dladdr(pCodeAddress, &dlInfo);
+
+    if((result != 0) && dlInfo.dli_fname) // It seems that usually this fails.
+		return strlcpy(pModuleName, dlInfo.dli_fname, moduleNameCapacity);
+
+	// To do: Make this be dynamically resized as needed.
+	const size_t         kCapacity = 64;
+	ModuleInfoApple      moduleInfoAppleArray[kCapacity];
+	size_t               requiredCapacity = GetModuleInfoApple(moduleInfoAppleArray, kCapacity, "__TEXT", true); // To consider: Make this true (use cache) configurable.
+	uint64_t             codeAddress = (uint64_t)(uintptr_t)pCodeAddress;
+
+	if(requiredCapacity > kCapacity)
+		requiredCapacity = kCapacity;
+
+	for(size_t i = 0; i < requiredCapacity; i++)
+	{
+		const ModuleInfoApple& miaUser = moduleInfoAppleArray[i];
+		
+		if((miaUser.mBaseAddress < codeAddress) && (codeAddress < (miaUser.mBaseAddress + miaUser.mSize)))
+			return strlcpy(pModuleName, miaUser.mPath, moduleNameCapacity);
+	}
+#endif
+
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleHandleFromAddress
+//
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pCodeAddress)
+{
+#if EATHREAD_APPLE_GETMODULEINFO_ENABLED
+	Dl_info dlInfo; memset(&dlInfo, 0, sizeof(dlInfo)); // Just memset because dladdr sometimes leaves fields untouched.
+	int     result = dladdr(pCodeAddress, &dlInfo);
+
+    if(result != 0)
+		return dlInfo.dli_fbase; // Is the object load base the same as the module handle? 
+
+	// Try using GetModuleInfoApple to get the information.
+	// To do: Make this be dynamically resized as needed.
+    const size_t         kCapacity = 256;
+	ModuleInfoApple      moduleInfoAppleArray[kCapacity];
+	size_t               requiredCapacity = GetModuleInfoApple(moduleInfoAppleArray, kCapacity, "__TEXT", true); // To consider: Make this true (use cache) configurable.
+	uint64_t             codeAddress = (uint64_t)(uintptr_t)pCodeAddress;
+
+	if(requiredCapacity > kCapacity)
+		requiredCapacity = kCapacity;
+
+	for(size_t i = 0; i < requiredCapacity; i++)
+	{
+		ModuleInfoApple& miaUser = moduleInfoAppleArray[i];
+		
+		if((miaUser.mBaseAddress < codeAddress) && (codeAddress < (miaUser.mBaseAddress + miaUser.mSize)))
+			return (ModuleHandle)miaUser.mBaseAddress;
+	}
+#endif
+
+	return 0;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
+{
+	// For Apple pthread_t is typedef'd to an internally defined _opaque_pthread_t*.
+	
+	bool threadIsSelf = (threadId == (intptr_t)EA::Thread::kThreadIdInvalid) || // Due to a specification mistake, this function 
+						(threadId == (intptr_t)EA::Thread::kThreadIdCurrent) || // accepts kThreadInvalid to mean current.
+						(threadId == (intptr_t)pthread_self());
+	
+	if(threadIsSelf)
+	{
+		bool result = true;
+		context.mStackBase  = (uintptr_t)GetStackBase();
+		context.mStackLimit = (uintptr_t)GetStackLimit();
+
+		#if defined(EA_PROCESSOR_ARM32)
+			void* p;
+			EAGetInstructionPointer(p);
+			context.mPC = (uint32_t)p;
+			context.mFP = (uint32_t)__builtin_frame_address(0);  // This data isn't exactly right. We want to return the registers as they 
+			context.mSP = (uint32_t)__builtin_frame_address(0);  // are for the caller, not for us. Without doing that we end up reporting 
+			context.mLR = (uint32_t)__builtin_return_address(0); // an extra frame (this one) on the top of callstacks.
+
+		#elif defined(EA_PROCESSOR_ARM64)
+			void* p;
+			EAGetInstructionPointer(p);
+			context.mPC = (uint64_t)p;
+			context.mFP = (uint64_t)__builtin_frame_address(0);
+			context.mSP = (uint64_t)__builtin_frame_address(0);  
+			context.mLR = (uint64_t)__builtin_return_address(0); 
+
+		#elif defined(EA_PROCESSOR_X86_64)
+			context.mRIP = (uint64_t)__builtin_return_address(0);
+			context.mRSP = 0;
+			context.mRBP = (uint64_t)__builtin_frame_address(1);
+
+		#elif defined(EA_PROCESSOR_X86)
+			context.mEIP = (uint32_t)__builtin_return_address(0);
+			context.mESP = 0;
+			context.mEBP = (uint32_t)__builtin_frame_address(1);
+
+		#else
+			// platform not supported 
+			result = false;
+
+		#endif
+	   
+		return result;
+	}
+	else
+	{
+		// Pause the thread, get its state, unpause it. 
+		//
+		// Question: Is it truly necessary to suspend a thread in Apple platforms in order to read
+		// their state? It is usually so for other platforms doing the same kind of thing.
+		//
+		// Question: Is it dangerous to suspend an arbitrary thread? Often such a thing is dangerous
+		// because that other thread might for example have some kernel mutex locked that we need.
+		// We'll have to see, as it's a great benefit for us to be able to read callstack contexts.
+		// Another solution would be to inject a signal handler into the thread and signal it and 
+		// have the handler read context information, if that can be useful. There's example code
+		// on the Internet for that.
+		// Some documentation:
+		//     http://www.linuxselfhelp.com/gnu/machinfo/html_chapter/mach_7.html
+		
+		mach_port_t   thread = pthread_mach_thread_np((pthread_t)threadId); // Convert pthread_t to kernel thread id.
+		kern_return_t result = thread_suspend(thread);
+		
+		if(result == KERN_SUCCESS)
+		{
+			#if defined(EA_PROCESSOR_ARM32)                            
+				arm_thread_state_t threadState; memset(&threadState, 0, sizeof(threadState));
+				mach_msg_type_number_t stateCount = MACHINE_THREAD_STATE_COUNT;
+				result = thread_get_state(thread, MACHINE_THREAD_STATE, (natural_t*)(uintptr_t)&threadState, &stateCount);
+
+				context.mFP = threadState.__r[7]; // Apple uses R7 for the frame pointer in both ARM and Thumb CPU modes.
+				context.mPC = threadState.__pc;
+				context.mSP = threadState.__sp;
+				context.mLR = threadState.__lr;        
+
+			#elif defined(EA_PROCESSOR_ARM64)                            
+				__darwin_arm_thread_state64 threadState; memset(&threadState, 0, sizeof(threadState));
+				mach_msg_type_number_t stateCount = MACHINE_THREAD_STATE_COUNT;
+				result = thread_get_state(thread, MACHINE_THREAD_STATE, (natural_t*)(uintptr_t)&threadState, &stateCount);
+
+				context.mFP = threadState.__fp;
+				context.mPC = threadState.__pc;
+				context.mSP = threadState.__sp;
+				context.mLR = threadState.__lr;
+
+			#elif defined(EA_PROCESSOR_X86_64)
+				// Note: This is yielding gibberish data for me, despite everything seemingly being done correctly.
+							
+				x86_thread_state_t     threadState; memset(&threadState, 0, sizeof(threadState));
+				mach_msg_type_number_t stateCount  = MACHINE_THREAD_STATE_COUNT;
+				result = thread_get_state(thread, MACHINE_THREAD_STATE, (natural_t*)(uintptr_t)&threadState, &stateCount);
+
+				context.mRIP = threadState.uts.ts64.__rip;
+				context.mRSP = threadState.uts.ts64.__rsp;
+				context.mRBP = threadState.uts.ts64.__rbp;
+
+			#elif defined(EA_PROCESSOR_X86)
+				// Note: This is yielding gibberish data for me, despite everything seemingly being done correctly.
+							
+				x86_thread_state_t     threadState; memset(&threadState, 0, sizeof(threadState));
+				mach_msg_type_number_t stateCount  = MACHINE_THREAD_STATE_COUNT;
+				result = thread_get_state(thread, MACHINE_THREAD_STATE, (natural_t*)(uintptr_t)&threadState, &stateCount);
+
+				context.mEIP = threadState.uts.ts32.__eip;
+				context.mESP = threadState.uts.ts32.__esp;
+				context.mEBP = threadState.uts.ts32.__ebp;
+
+			#endif
+
+			thread_resume(thread); 
+			return (result == KERN_SUCCESS);
+		}
+	}
+	
+	// Not currently implemented for the given platform.
+	memset(&context, 0, sizeof(context));
+	return false;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContextSysThreadId
+//
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
+{
+	pthread_t pthread = pthread_from_mach_thread_np((mach_port_t)sysThreadId);
+	
+	return GetCallstackContext(context, (intptr_t)pthread);
+}
+
+
+// To do: Remove the usage of sStackBase for the platforms that it's not needed,
+// as can be seen from the logic below. For example Mac OSX probably doesn't need it.
+static EA::Thread::ThreadLocalStorage sStackBase;
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* pStackBase)
+{
+	if(pStackBase)
+		sStackBase.SetValue(pStackBase);
+	else
+	{
+		pStackBase = __builtin_frame_address(0);
+
+		if(pStackBase)
+			SetStackBase(pStackBase);
+		// Else failure; do nothing.
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	#if defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE)
+		void* pBase;
+		if(GetPthreadStackInfo(&pBase, NULL))
+			return pBase;
+	#endif
+
+	// Else we require the user to have set this previously, usually via a call 
+	// to SetStackBase() in the start function of this currently executing
+	// thread (or main for the main thread).
+	return sStackBase.GetValue();
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	#if defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE)
+		void* pLimit;
+		if(GetPthreadStackInfo(NULL, &pLimit))
+			return pLimit;
+	#endif
+
+	// If this fails then we might have an issue where you are using GCC but not 
+	// using the GCC standard library glibc. Or maybe glibc doesn't support 
+	// __builtin_frame_address on this platform. Or maybe you aren't using GCC but
+	// rather a compiler that masquerades as GCC (common situation).
+	void* pStack = __builtin_frame_address(0);
+	return (void*)((uintptr_t)pStack & ~4095); // Round down to nearest page.
+}
+
+
+} // namespace Thread
+} // namespace EA
+
+

+ 163 - 0
source/apple/eathread_semaphore_apple.cpp

@@ -0,0 +1,163 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <EABase/eabase.h>
+#include <eathread/eathread_semaphore.h>
+
+#if defined(EA_PLATFORM_APPLE)
+
+#include <mach/task.h>
+#include <mach/mach_init.h>
+#include <mach/kern_return.h>
+#include <semaphore.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdio.h>
+#include <limits.h>
+
+
+EASemaphoreData::EASemaphoreData()
+: mSemaphore(), mnCount(0), mnMaxCount(INT_MAX)
+{
+}
+
+
+EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* /*pName*/)
+: mInitialCount(initialCount), mMaxCount(INT_MAX), mbIntraProcess(bIntraProcess)
+{
+}
+
+
+EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
+{
+	if(!pSemaphoreParameters && bDefaultParameters)
+	{
+		SemaphoreParameters parameters;
+		Init(&parameters);
+	}
+	else
+		Init(pSemaphoreParameters);
+}
+
+
+EA::Thread::Semaphore::Semaphore(int initialCount)
+{
+	SemaphoreParameters parameters(initialCount);
+	Init(&parameters);
+}
+
+
+EA::Thread::Semaphore::~Semaphore()
+{
+	const kern_return_t result = semaphore_destroy(mach_task_self(), mSemaphoreData.mSemaphore); (void)result;
+	EAT_ASSERT(KERN_SUCCESS == result);
+}
+
+
+bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
+{
+	if(pSemaphoreParameters)
+	{
+		mSemaphoreData.mnCount    = pSemaphoreParameters->mInitialCount;
+		mSemaphoreData.mnMaxCount = pSemaphoreParameters->mMaxCount;
+
+		if(mSemaphoreData.mnCount < 0)
+			mSemaphoreData.mnCount = 0;
+
+		// Todo, Jaap Suter, December 2009, do we care about actually supporting this?
+		mSemaphoreData.mbIntraProcess = pSemaphoreParameters->mbIntraProcess;
+
+		const kern_return_t result = semaphore_create(mach_task_self(), &mSemaphoreData.mSemaphore, SYNC_POLICY_FIFO, static_cast<int>(mSemaphoreData.mnCount)); (void)result;
+		EAT_ASSERT(KERN_SUCCESS == result);
+
+		return true;
+	}
+
+	return false;
+}
+
+
+int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
+{
+	kern_return_t result = KERN_SUCCESS;
+
+	if(timeoutAbsolute == kTimeoutNone)
+	{
+		result = semaphore_wait(mSemaphoreData.mSemaphore);
+
+		if(result != KERN_SUCCESS)
+		{
+			EAT_ASSERT(false); // This is an error condition.
+			return kResultError;
+		}
+	}
+	else
+	{
+		for (;;)
+		{               
+			ThreadTime timeoutRelative = kTimeoutImmediate;
+			if (timeoutAbsolute != kTimeoutImmediate)
+			{
+				ThreadTime timeCurrent = GetThreadTime();
+				timeoutRelative = (timeoutAbsolute > timeCurrent) ? (timeoutAbsolute - timeCurrent) : kTimeoutImmediate;
+			}
+
+			mach_timespec_t machTimeoutRelative = { (unsigned int)timeoutRelative.tv_sec, (clock_res_t)timeoutRelative.tv_nsec };
+			result = semaphore_timedwait(mSemaphoreData.mSemaphore, machTimeoutRelative);
+
+			if (result == KERN_SUCCESS)
+				break;
+
+			if (result == KERN_OPERATION_TIMED_OUT)
+				return kResultTimeout;
+
+			// printf("semaphore_timedwait other error: %d\n", result);
+		}
+	}
+
+	EAT_ASSERT(mSemaphoreData.mnCount > 0);
+	return (int)mSemaphoreData.mnCount.Decrement(); // AtomicInt32 operation. Note that the value of the semaphore count could change from the returned value by the time the caller reads it. This is fine but the user should understand this.
+}
+
+
+int EA::Thread::Semaphore::Post(int count)
+{
+	// Some systems have a sem_post_multiple which we could take advantage 
+	// of here to atomically post multiple times.
+	EAT_ASSERT(mSemaphoreData.mnCount >= 0);
+
+	int currentCount = mSemaphoreData.mnCount;
+
+	// If count would cause an overflow exit early
+	if ((mSemaphoreData.mnMaxCount - count) < currentCount)
+		return kResultError;
+
+	currentCount += count;
+
+	while(count-- > 0)
+	{
+		++mSemaphoreData.mnCount;     // AtomicInt32 operation.
+
+		if(semaphore_signal(mSemaphoreData.mSemaphore) != KERN_SUCCESS)
+		{
+			--mSemaphoreData.mnCount; // AtomicInt32 operation.
+			EAT_ASSERT(false);
+			return kResultError;        
+		}
+	}
+
+	// If all count posts occurred...
+	return currentCount; // It's possible that another thread may have modified this value since we changed it, but that's not important.
+}
+
+
+int EA::Thread::Semaphore::GetCount() const
+{
+	return (int)mSemaphoreData.mnCount;
+}
+
+
+#endif // #if defined(EA_PLATFORM_APPLE) 

+ 412 - 0
source/arm/eathread_callstack_arm.cpp

@@ -0,0 +1,412 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <eathread/eathread_storage.h>
+#include <string.h>
+
+#if EATHREAD_DEBUG_DETAIL_ENABLED
+	#include <EAStdC/EASprintf.h>
+#endif
+
+#if defined(EA_PLATFORM_WINDOWS) && EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+	#pragma warning(push, 0)
+	#include <Windows.h>
+	#include <winternl.h>
+	#pragma warning(pop)
+#endif
+
+#if defined(EA_PLATFORM_UNIX)
+	#include <pthread.h>
+	#include <eathread/eathread.h>
+#endif
+
+#if defined(EA_COMPILER_CLANG)
+	#include <unwind.h>
+#endif
+
+namespace EA
+{
+namespace Thread
+{
+
+
+#if defined(EA_PLATFORM_WINDOWS) && EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+	EATHREADLIB_API void GetInstructionPointer(void*& pInstruction)
+	{
+		CONTEXT context;
+
+		// Apparently there is no need to memset the context struct.
+		context.ContextFlags = CONTEXT_ALL;
+		RtlCaptureContext(&context);
+
+		// Possibly use the __emit intrinsic. http://msdn.microsoft.com/en-us/library/ms933778.aspx
+		pInstruction = (void*)(uintptr_t)context.___; // To do.
+	}
+#elif defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)
+	EATHREADLIB_API void GetInstructionPointer(void*& pInstruction)
+	{
+		// __builtin_return_address returns the address with the Thumb bit set
+		// if it's a return to Thumb code. We intentionally preserve this and 
+		// don't try to mask it away.
+		pInstruction = (void*)(uintptr_t)__builtin_return_address(0);
+	}
+#else
+	EATHREADLIB_API void GetInstructionPointer(void*& /*pInstruction*/)
+	{
+		//Un-implemented
+	}
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// InitCallstack
+//
+EATHREADLIB_API void InitCallstack()
+{
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ShutdownCallstack
+//
+EATHREADLIB_API void ShutdownCallstack()
+{
+}
+
+
+#if defined(EA_PLATFORM_APPLE)
+
+	// Apple defines a different ABI than the ARM eabi used by Linux and the ABI used
+	// by Microsoft. It implements a predictable stack frame system using r7 as the 
+	// frame pointer. Documentation:
+	//     http://developer.apple.com/library/ios/#documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html
+	//
+	// Apple ARM stack frame:
+	//     struct StackFrame {
+	//         StackFrame* mpParentStackFrame;
+	//         void* mpReturnPC;
+	//     }
+	//
+	// Basically, Apple uses r7 as a frame pointer. So for any function you are
+	// executing, r7 + 4 is the LR passed to us by the caller and is the PC of 
+	// the parent. And r7 + 0 is a pointer to the parent's r7. 
+	//
+	static size_t GetCallstackARMApple(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+	{
+		struct StackFrame {
+			StackFrame* mpParentStackFrame;
+			void*       mpReturnPC;
+		};
+		
+		size_t index = 0;
+
+		if(nReturnAddressArrayCapacity && pContext->mFP) // To consider: Do some basic validation of mFP if it refers to this same thread.
+		{
+			StackFrame* pStackFrame = static_cast<StackFrame*>((void*)pContext->mFP); // Points to the GetCallstack frame pointer.
+
+			pReturnAddressArray[index++] = pStackFrame->mpReturnPC;  // Should happen to be equal to pContext->mLR.
+
+			while(pStackFrame && pStackFrame->mpReturnPC && (index < nReturnAddressArrayCapacity)) // To consider: do some validation of the PC. We can validate it by making sure it's with 20 MB of our PC and also verify that the instruction before it (be it Thumb or ARM) is a BL or BLX function call instruction.
+			{
+				pStackFrame = pStackFrame->mpParentStackFrame;
+
+				if(pStackFrame && pStackFrame->mpReturnPC)
+					pReturnAddressArray[index++] = pStackFrame->mpReturnPC;
+			}
+		}
+
+		return index;
+	}
+
+#endif
+
+#if defined(EA_COMPILER_CLANG)
+	struct CallstackState
+	{
+		void** current;
+		void** end;
+	};
+
+	static _Unwind_Reason_Code UnwindCallback(struct _Unwind_Context* context, void* arg)
+	{
+		CallstackState* state = static_cast<CallstackState*>(arg);
+		uintptr_t pc = _Unwind_GetIP(context);
+		if (pc)
+		{
+			if (state->current == state->end)
+			{
+				return _URC_END_OF_STACK;
+			}
+			else
+			{
+				*state->current++ = reinterpret_cast<void*>(pc);
+			}
+		}
+		return _URC_NO_REASON;
+	}
+
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstack
+//
+// Capture up to nReturnAddressArrayCapacity elements of the call stack, 
+// or the whole callstack, whichever is smaller. 
+///////////////////////////////////////////////////////////////////////////////
+
+	EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+	{
+		void* p;
+		CallstackContext context;
+		size_t entryCount = 0;
+
+		if(pContext)
+			context = *pContext;
+		else
+		{
+			#if defined(__ARMCC_VERSION)
+				context.mFP = 0; // We don't currently have a simple way to read fp (which is r7 (Thumb) or r11 (ARM)).
+				context.mSP = (uintptr_t)__current_sp();
+				context.mLR = (uintptr_t)__return_address();
+				GetInstructionPointer(p); // Intentionally don't call __current_pc() or EAGetInstructionPointer, because these won't set the Thumb bit it this is Thumb code.
+				context.mPC = (uintptr_t)p;
+
+			#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG) // Including Apple iOS.
+				void* spAddress = &context.mSP;
+				void* sp;
+				asm volatile(
+					"add %0, sp, #0\n"
+					"str %0, [%1, #0]\n"
+						 : "=r"(sp), "+r"(spAddress) :: "memory");
+
+				context.mFP = (uintptr_t)__builtin_frame_address(0);
+				context.mLR = (uintptr_t)__builtin_return_address(0);
+				GetInstructionPointer(p); // Intentionally don't call EAGetInstructionPointer, because it won't set the Thumb bit it this is Thumb code.
+				context.mPC = (uintptr_t)p;
+
+			#elif defined(EA_PLATFORM_WINDOWS) && EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+				// Possibly use the __emit intrinsic. Do this by making a __declspec(naked) function that 
+				// does nothing but return r14 (move r14 to r0). Need to know the opcode for that.
+				// http://msdn.microsoft.com/en-us/library/ms933778.aspx
+				#error Need to complete this somehow.
+				context.mFP = 0; 
+				context.mLR = 0;
+				context.mSP = 0;
+				GetInstructionPointer(p); // Intentionally don't call EAGetInstructionPointer, because it won't set the Thumb bit it this is Thumb code.
+				context.mPC = (uintptr_t)p;
+			#endif
+		}
+
+		#if defined(__APPLE__)
+			// We have reason to believe that the following should be reliable. But if it's not then we should
+			// just call the code below.
+			entryCount = GetCallstackARMApple(pReturnAddressArray, nReturnAddressArrayCapacity, &context);
+
+			if(entryCount >= 3) // If GetCallstackARMApple seems to have been successful, use it. Else fall through to the more complicated code below. 
+				return entryCount;
+		#elif defined(EA_COMPILER_CLANG)
+			CallstackState state = { pReturnAddressArray, pReturnAddressArray + nReturnAddressArrayCapacity };
+			_Unwind_Backtrace(UnwindCallback, &state);
+
+			entryCount = state.current - pReturnAddressArray;
+		#else
+			EA_UNUSED(pReturnAddressArray);
+			EA_UNUSED(nReturnAddressArrayCapacity);
+			EA_UNUSED(context);
+		#endif
+
+		EA_UNUSED(p);
+		return entryCount;
+	}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
+{
+	context.mSP = pContext->mGpr[13];
+	context.mLR = pContext->mGpr[14];
+	context.mPC = pContext->mGpr[15];
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleFromAddress
+//
+EATHREADLIB_API size_t GetModuleFromAddress(const void* /*address*/, char* pModuleName, size_t /*moduleNameCapacity*/)
+{
+	pModuleName[0] = 0;
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleHandleFromAddress
+//
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* /*pAddress*/)
+{
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+// Under Windows, the threadId parameter is expected to be a thread HANDLE, 
+// which is different from a windows integer thread id.
+// On Unix the threadId parameter is expected to be a pthread id.
+//
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
+{
+	memset(&context, 0, sizeof(context));
+
+	// True Linux-based ARM platforms (usually tablets and phones) can use pthread_attr_getstack.
+	#if defined(EA_PLATFORM_ANDROID) || defined(EA_PLATFORM_IPHONE)
+		if((threadId == (intptr_t)kThreadIdInvalid) || 
+		   (threadId == (intptr_t)kThreadIdCurrent) || 
+		   (threadId == (intptr_t)EA::Thread::GetThreadId()))
+		{
+			void* p;
+
+			// TODO: make defines of this so that the implementation between us and GetCallstack remains the same
+			#if defined(__ARMCC_VERSION)
+				context.mSP = (uint32_t)__current_sp();
+				context.mLR = (uint32_t)__return_address();
+				context.mPC = (uint32_t)__current_pc();
+
+			#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)
+				// register uintptr_t current_sp asm ("sp");
+				p = __builtin_frame_address(0);
+				context.mSP = (uintptr_t)p;
+
+				p = __builtin_return_address(0);
+				context.mLR = (uint32_t)p;
+
+				EAGetInstructionPointer(p);
+				context.mPC = reinterpret_cast<uintptr_t>(p);
+
+			#elif defined(_MSC_VER)
+				context.mSP = 0;
+
+				#error EACallstack::GetCallstack: Need a way to get the return address (register 14)
+				// Possibly use the __emit intrinsic. Do this by making a __declspec(naked) function that 
+				// does nothing but return r14 (move r14 to r0). Need to know the opcode for that.
+				// http://msdn.microsoft.com/en-us/library/ms933778.aspx
+				context.mLR = 0;
+
+				EAGetInstructionPointer(p);
+				context.mPC = reinterpret_cast<uintptr_t>(p);
+			#endif
+
+			context.mStackBase    = (uintptr_t)GetStackBase();
+			context.mStackLimit   = (uintptr_t)GetStackLimit();
+			context.mStackPointer = context.mSP;
+
+			return true;
+		}
+		// Else haven't implemented getting the stack info for other threads
+
+	#else
+		// Not currently implemented for the given platform.
+		EA_UNUSED(threadId);
+
+	#endif
+
+   return false;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContextSysThreadId
+//
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
+{
+	return GetCallstackContext(context, sysThreadId);
+}
+
+
+// To do: Remove the usage of sStackBase for the platforms that it's not needed,
+// as can be seen from the logic below. For example iPhone probably doesn't need it.
+EA::Thread::ThreadLocalStorage sStackBase;
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* pStackBase)
+{
+	if(pStackBase)
+		sStackBase.SetValue(pStackBase);
+	else
+	{
+		// Can't call GetStackLimit() because doing so would disturb the stack. 
+		// As of this writing, we don't have an EAGetStackTop macro which could do this.
+		// So we implement it inline here.
+		#if   defined(__ARMCC_VERSION)
+			pStackBase = (void*)__current_sp();
+		#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)
+			pStackBase = __builtin_frame_address(0);
+		#endif
+
+		if(pStackBase)
+			SetStackBase(pStackBase);
+		// Else failure; do nothing.
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	#if defined(EA_PLATFORM_UNIX)
+		void* pBase;
+		if(GetPthreadStackInfo(&pBase, NULL))
+			return pBase;
+	#endif
+
+	// Else we require the user to have set this previously, usually via a call 
+	// to SetStackBase() in the start function of this currently executing 
+	// thread (or main for the main thread).
+	return sStackBase.GetValue();
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	#if defined(EA_PLATFORM_UNIX)
+		void* pLimit;
+		if(GetPthreadStackInfo(NULL, &pLimit))
+			return pLimit;
+	#endif
+
+	#if   defined(__ARMCC_VERSION)
+		void* pStack = (void*)__current_sp();
+	#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)
+		void* pStack = __builtin_frame_address(0);
+	#else
+		void* pStack = NULL;  // TODO:  determine fix.
+		pStack = &pStack;
+	#endif
+
+	return (void*)((uintptr_t)pStack & ~4095); // Round down to nearest page, as the stack grows downward.
+}
+
+
+
+} // namespace Thread
+} // namespace EA
+
+
+
+
+

+ 217 - 0
source/cpp11/eathread_cpp11.cpp

@@ -0,0 +1,217 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+
+#include "eathread/eathread.h"
+#include "eathread/eathread_thread.h"
+
+#include <cstring>
+#include <sstream>
+#include <type_traits>
+
+namespace EA
+{
+	namespace Thread
+	{
+		EA::Thread::AssertionFailureFunction gpAssertionFailureFunction = NULL;
+		void*                                gpAssertionFailureContext  = NULL;
+
+		EATHREADLIB_API ThreadId EA::Thread::GetThreadId()
+		{
+			return std::this_thread::get_id();
+		}
+
+		EATHREADLIB_API ThreadId EA::Thread::GetThreadId(EA::Thread::SysThreadId id)
+		{
+			EAThreadDynamicData* const pTDD = EA::Thread::FindThreadDynamicData(id);
+			if(pTDD)
+			{   
+				return pTDD->mpComp->mThread.get_id();
+			}
+
+			return EA::Thread::kThreadIdInvalid;
+		}
+
+		EATHREADLIB_API SysThreadId EA::Thread::GetSysThreadId(ThreadId threadId)
+		{
+			EAThreadDynamicData* tdd = EA::Thread::FindThreadDynamicData(threadId);
+			if (tdd && tdd->mpComp)
+				return tdd->mpComp->mThread.native_handle();
+
+			ThreadId threadIdCurrent = GetThreadId();
+			if(threadId == threadIdCurrent)
+			{
+				#if defined(EA_PLATFORM_MICROSOFT)
+					std::thread::id stdId = std::this_thread::get_id();
+					EAT_COMPILETIME_ASSERT(sizeof(_Thrd_t) == sizeof(std::thread::id));
+					return ((_Thrd_t&)stdId)._Hnd;
+				#elif EA_POSIX_THREADS_AVAILABLE && defined(_YVALS)
+					std::thread::id stdId = std::this_thread::get_id();
+					EAT_COMPILETIME_ASSERT(sizeof(_Thrd_t) == sizeof(std::thread::id));
+					return reinterpret_cast<_Thrd_t>(stdId);
+				#else
+					#error Platform not supported yet.
+				#endif
+			}
+
+			EAT_ASSERT_MSG(false, "Failed to find associated EAThreadDynamicData for this thread.\n");
+			return SysThreadId();
+		}
+
+		EATHREADLIB_API SysThreadId EA::Thread::GetSysThreadId()
+		{
+			// There currently isn't a means to directly get the current SysThreadId, so we do it indirectly:
+			return GetSysThreadId(std::this_thread::get_id());
+		}
+
+		EATHREADLIB_API ThreadTime EA::Thread::GetThreadTime()
+		{
+			using namespace std::chrono;
+			auto nowMs = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
+			return nowMs.count();
+		}
+
+		EATHREADLIB_API int GetThreadPriority()
+		{
+			// No way to query or set thread priority through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs
+			return kThreadPriorityDefault;
+		}
+
+		EATHREADLIB_API bool SetThreadPriority(int nPriority)
+		{
+			// No way to query or set thread priority through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs
+			return false;
+		}
+
+		EATHREADLIB_API void SetThreadProcessor(int nProcessor)
+		{
+			// No way to query or set thread processor through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs
+		}
+
+		EATHREADLIB_API int GetThreadProcessor()
+		{
+			// No way to query or set thread processor through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs
+			return 0;
+		}
+
+		EATHREADLIB_API int GetProcessorCount()
+		{
+			return static_cast<int>(std::thread::hardware_concurrency());
+		}
+
+		EATHREADLIB_API void ThreadSleep(const ThreadTime& timeRelative)
+		{
+			std::this_thread::sleep_for(std::chrono::milliseconds(timeRelative));
+		}
+
+		void ThreadEnd(intptr_t threadReturnValue)
+		{
+			// No way to end a thread through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs
+			EAT_ASSERT_MSG(false, "ThreadEnd is not implemented for C++11 threads.\n");
+		}
+
+		EATHREADLIB_API void EA::Thread::SetThreadAffinityMask(const EA::Thread::ThreadId& id, ThreadAffinityMask nAffinityMask)
+		{
+			// Update the affinity mask in the thread dynamic data cache.
+			EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+			if(pTDD)
+			{
+				pTDD->mnThreadAffinityMask = nAffinityMask;
+			}
+
+			#if EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED 
+				// Call the Windows library function.
+			#endif
+		}
+
+		EATHREADLIB_API EA::Thread::ThreadAffinityMask EA::Thread::GetThreadAffinityMask(const EA::Thread::ThreadId& id)
+		{ 
+			// Update the affinity mask in the thread dynamic data cache.
+			EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+			if(pTDD)
+			{
+				return pTDD->mnThreadAffinityMask;
+			}
+
+			return kThreadAffinityMaskAny;
+		}
+
+		EATHREADLIB_API void SetAssertionFailureFunction(AssertionFailureFunction pAssertionFailureFunction, void* pContext)
+		{
+			gpAssertionFailureFunction = pAssertionFailureFunction;
+			gpAssertionFailureContext  = pContext;
+		}
+
+		EATHREADLIB_API void AssertionFailure(const char* pExpression)
+		{
+			if(gpAssertionFailureFunction)
+				gpAssertionFailureFunction(pExpression, gpAssertionFailureContext);
+		}
+
+		void* GetThreadStackBase()
+		{
+			return nullptr;
+		}
+
+		// This can be removed once all remaining synchronization primitives are implemented in terms of C++11 APIs
+		uint32_t EA::Thread::RelativeTimeoutFromAbsoluteTimeout(ThreadTime timeoutAbsolute)
+		{
+			EAT_ASSERT((timeoutAbsolute == kTimeoutImmediate) || (timeoutAbsolute > EATHREAD_MIN_ABSOLUTE_TIME)); // Assert that the user didn't make the mistake of treating time as relative instead of absolute.
+
+			DWORD timeoutRelative = 0;
+
+			if (timeoutAbsolute == kTimeoutNone)
+			{
+				timeoutRelative = 0xffffffff;
+			}
+			else if (timeoutAbsolute == kTimeoutImmediate)
+			{
+				timeoutRelative = 0;
+			}
+			else
+			{
+				ThreadTime timeCurrent(GetThreadTime());
+				timeoutRelative = (timeoutAbsolute > timeCurrent) ? static_cast<DWORD>(timeoutAbsolute - timeCurrent) : 0;
+			}
+
+			EAT_ASSERT((timeoutRelative == 0xffffffff) || (timeoutRelative < 100000000)); // Assert that the timeout is a sane value and didn't wrap around.
+
+			return timeoutRelative;
+		}
+
+		// Implement native_handle_type comparison as a memcmp() - may need platform specific implementations on some future platforms.
+		bool Equals(const SysThreadId& a, const SysThreadId& b)
+		{
+			static_assert((std::is_fundamental<SysThreadId>::value || std::is_pointer<SysThreadId>::value || std::is_pod<SysThreadId>::value), 
+				"SysThreadId should be comparable using memcmp()");
+			return memcmp(&a, &b, sizeof(SysThreadId)) == 0;
+		}
+
+		namespace detail
+		{
+			// Override the default EAThreadToString implementation
+			#define EAThreadIdToString_CUSTOM_IMPLEMENTATION
+			ThreadIdToStringBuffer::ThreadIdToStringBuffer(EA::Thread::ThreadId threadId)
+			{
+				std::stringstream formatStream;
+				formatStream << threadId;
+				strncpy(mBuf, formatStream.str().c_str(), BufSize - 1);
+				mBuf[BufSize - 1] = '\0';
+			}
+
+			SysThreadIdToStringBuffer::SysThreadIdToStringBuffer(EA::Thread::SysThreadId sysThreadId)
+			{
+				strncpy(mBuf, "Unknown", BufSize - 1);
+				mBuf[BufSize - 1] = '\0';
+			}
+		}
+	}
+}
+

+ 97 - 0
source/cpp11/eathread_mutex_cpp11.cpp

@@ -0,0 +1,97 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include "eathread/eathread_mutex.h"
+
+EAMutexData::EAMutexData() : mnLockCount(0) {}
+
+EA::Thread::MutexParameters::MutexParameters(bool /*bIntraProcess*/, const char* pName)
+{
+	if(pName)
+	{
+		strncpy(mName, pName, sizeof(mName)-1);
+		mName[sizeof(mName)-1] = 0;
+	}
+	else
+	{
+		mName[0] = 0;
+	}
+}
+
+EA::Thread::Mutex::Mutex(const MutexParameters* pMutexParameters, bool bDefaultParameters)
+{
+	if(!pMutexParameters && bDefaultParameters)
+	{
+		MutexParameters parameters;
+		Init(&parameters);
+	}
+	else
+	{
+		Init(pMutexParameters);
+	}
+}
+
+EA::Thread::Mutex::~Mutex()
+{
+	EAT_ASSERT(mMutexData.mnLockCount == 0);
+}
+
+bool EA::Thread::Mutex::Init(const MutexParameters* pMutexParameters)
+{
+	if (pMutexParameters)
+	{
+		mMutexData.mnLockCount = 0;
+		return true;
+	}
+	return false;
+}
+
+int EA::Thread::Mutex::Lock(const ThreadTime& timeoutAbsolute)
+{
+	if (timeoutAbsolute == kTimeoutNone)
+	{
+		mMutexData.mMutex.lock();
+	}
+	else
+	{
+		std::chrono::milliseconds timeoutAbsoluteMs(timeoutAbsolute);
+		std::chrono::time_point<std::chrono::system_clock> timeout_time(timeoutAbsoluteMs);
+		if (!mMutexData.mMutex.try_lock_until(timeout_time))
+		{
+			return kResultTimeout;
+		}
+	}
+
+	EAT_ASSERT((mMutexData.mThreadId = EA::Thread::GetThreadId()) != kThreadIdInvalid);
+	EAT_ASSERT(mMutexData.mnLockCount >= 0);
+
+	return ++mMutexData.mnLockCount; // This is safe to do because we have the lock.
+}
+
+int EA::Thread::Mutex::Unlock()
+{
+	EAT_ASSERT(mMutexData.mThreadId == EA::Thread::GetThreadId());
+	EAT_ASSERT(mMutexData.mnLockCount > 0);
+
+	const int nReturnValue(--mMutexData.mnLockCount); // This is safe to do because we have the lock.
+	mMutexData.mMutex.unlock();
+	return nReturnValue;
+}
+
+int EA::Thread::Mutex::GetLockCount() const
+{
+	return mMutexData.mnLockCount;
+}
+
+bool EA::Thread::Mutex::HasLock() const
+{
+#if EAT_ASSERT_ENABLED
+	return (mMutexData.mnLockCount > 0) && (mMutexData.mThreadId == EA::Thread::GetThreadId());
+#else
+	return (mMutexData.mnLockCount > 0); // This is the best we can do, though it is of limited use, since it doesn't tell you if you are the thread with the lock.
+#endif
+}
+
+
+

+ 5 - 0
source/cpp11/eathread_semaphore_cpp11.cpp

@@ -0,0 +1,5 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include "eathread/eathread_semaphore.h"

+ 488 - 0
source/cpp11/eathread_thread_cpp11.cpp

@@ -0,0 +1,488 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include "eathread/eathread_thread.h"
+#include "eathread/eathread.h"
+#include "eathread/eathread_sync.h"
+#include "eathread/eathread_callstack.h"
+#include "eathread/internal/eathread_global.h"
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+
+		static AtomicInt32 nLastProcessor = 0;
+		const size_t kMaxThreadDynamicDataCount = 128;
+
+		struct EAThreadGlobalVars
+		{
+			char gThreadDynamicData[kMaxThreadDynamicDataCount][sizeof(EAThreadDynamicData)];
+			AtomicInt32 gThreadDynamicDataAllocated[kMaxThreadDynamicDataCount];
+			Mutex gThreadDynamicMutex;
+		};
+		EATHREAD_GLOBALVARS_CREATE_INSTANCE;
+
+		EAThreadDynamicData* AllocateThreadDynamicData()
+		{
+			for (size_t i(0); i < kMaxThreadDynamicDataCount; ++i)
+			{
+				if (EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[i].SetValueConditional(1, 0))
+					return (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+			}
+
+			// This is a safety fallback mechanism. In practice it won't be used in almost all situations.
+			if (gpAllocator)
+				return (EAThreadDynamicData*)gpAllocator->Alloc(sizeof(EAThreadDynamicData));
+
+			return nullptr;
+		}
+
+		void FreeThreadDynamicData(EAThreadDynamicData* pEAThreadDynamicData)
+		{
+			pEAThreadDynamicData->~EAThreadDynamicData();
+			if ((pEAThreadDynamicData >= (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData) && (pEAThreadDynamicData < ((EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData + kMaxThreadDynamicDataCount)))
+			{
+				EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[pEAThreadDynamicData - (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData].SetValue(0);
+			}
+			else
+			{
+				// Assume the data was allocated via the fallback mechanism.
+				if (gpAllocator)
+				{
+					gpAllocator->Free(pEAThreadDynamicData);
+				}
+			}
+		}
+
+		EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId)
+		{
+			for (size_t i(0); i < kMaxThreadDynamicDataCount; ++i)
+			{
+				EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+				if (pTDD->mpComp && pTDD->mpComp->mThread.get_id() == threadId)
+					return pTDD;
+			}
+			return nullptr; // This is no practical way we can find the data unless thread-specific storage was involved.
+		}
+
+		EAThreadDynamicData* FindThreadDynamicData(EA::Thread::ThreadUniqueId threadId)
+		{
+			for (size_t i(0); i < kMaxThreadDynamicDataCount; ++i)
+			{
+				EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+				if (pTDD->mUniqueThreadId == threadId)
+					return pTDD;
+			}
+			return nullptr; // This is no practical way we can find the data unless thread-specific storage was involved.
+		}
+
+		EAThreadDynamicData* FindThreadDynamicData(EA::Thread::SysThreadId sysThreadId)
+		{
+			for (size_t i(0); i < kMaxThreadDynamicDataCount; ++i)
+			{
+				EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+				if (pTDD->mpComp && pTDD->mpComp->mThread.native_handle() == sysThreadId)
+					return pTDD;
+			}
+
+			// NOTE:  This function does not support finding externally created threads due to limitations in the CPP11 std::thread API.
+			//        At the time of writing, it is not possible to retrieve the thread object of a thread not created by the CPP11 API.
+			return nullptr; // This is no practical way we can find the data unless thread-specific storage was involved.
+		}
+	}
+}
+
+EA_DISABLE_VC_WARNING(4355) // this used in base member initializer list - should be safe in this context
+EAThreadDynamicData::EAThreadDynamicData(void* userFunc, void* userContext, void* userWrapperFunc, ThreadFunc threadFunc) : 
+	mnRefCount(2), // Init ref count to 2, one corresponding release happens on threadFunc exit and the other when Thread class is destroyed or Begin is called again
+	mStatus(EA::Thread::Thread::kStatusNone),
+	mpComp(nullptr)
+{
+	mpComp = new EAThreadComposite();
+
+	if(mpComp)
+		mpComp->mThread = std::thread(threadFunc, this, userFunc, userContext, userWrapperFunc);  // This doesn't spawn CPP11 threads when created within the EAThreadComposite constructor.
+}
+
+
+EAThreadDynamicData::EAThreadDynamicData(EA::Thread::ThreadUniqueId uniqueThreadId, const char* pThreadName) : 
+	mnRefCount(2), // Init ref count to 2, one corresponding release happens on threadFunc exit and the other when Thread class is destroyed or Begin is called again
+	mStatus(EA::Thread::Thread::kStatusNone),
+	mpComp(nullptr),
+	mUniqueThreadId(uniqueThreadId)
+{
+	strncpy(mName, pThreadName, EATHREAD_NAME_SIZE);
+	mName[EATHREAD_NAME_SIZE - 1] = 0;
+}
+
+EA_RESTORE_VC_WARNING()
+
+
+EAThreadDynamicData::~EAThreadDynamicData()
+{
+	if (mpComp->mThread.joinable())
+		mpComp->mThread.detach();
+
+	if(mpComp)
+		delete mpComp;
+
+	mpComp = nullptr;
+
+	// the threads, promises, and futures in this class will 
+	// allocate memory with the Concurrency runtime new/delete operators.
+	// If you're crashing in here with access violations on process exit,
+	// then you likely have a static instance of EA::Thread::Thread somewhere
+	// that's being destructed after your memory system is uninitialized
+	// leaving dangling pointers to bad memory.  Attempt to change
+	// these static instances to be constructed/destructed with the scope
+	// of normal app operation.
+}
+
+
+void EAThreadDynamicData::AddRef()
+{
+	mnRefCount.Increment();
+}
+
+
+void EAThreadDynamicData::Release()
+{
+	if(mnRefCount.Decrement() == 0)
+		EA::Thread::FreeThreadDynamicData(this);
+}
+
+namespace EA
+{
+	namespace Thread
+	{
+		ThreadParameters::ThreadParameters() : 
+			mpStack(NULL), 
+			mnStackSize(0), 
+			mnPriority(kThreadPriorityDefault), 
+			mnProcessor(kProcessorDefault), 
+			mpName(""), 
+			mbDisablePriorityBoost(false)
+		{
+		}
+
+		RunnableFunctionUserWrapper  Thread::sGlobalRunnableFunctionUserWrapper = NULL;
+		RunnableClassUserWrapper     Thread::sGlobalRunnableClassUserWrapper    = NULL;
+		AtomicInt32      			 Thread::sDefaultProcessor                  = kProcessorAny;
+
+		RunnableFunctionUserWrapper Thread::GetGlobalRunnableFunctionUserWrapper()
+		{
+			return sGlobalRunnableFunctionUserWrapper;
+		}
+
+		void Thread::SetGlobalRunnableFunctionUserWrapper(RunnableFunctionUserWrapper pUserWrapper)
+		{
+			if (sGlobalRunnableFunctionUserWrapper != NULL)
+			{
+				// Can only be set once in entire game. 
+				EAT_ASSERT(false);
+			}
+			else
+			{
+				sGlobalRunnableFunctionUserWrapper = pUserWrapper;
+			}
+		}
+
+		RunnableClassUserWrapper Thread::GetGlobalRunnableClassUserWrapper()
+		{
+			return sGlobalRunnableClassUserWrapper;
+		}
+
+		void Thread::SetGlobalRunnableClassUserWrapper(RunnableClassUserWrapper pUserWrapper)
+		{
+			if (sGlobalRunnableClassUserWrapper != NULL)
+			{
+				// Can only be set once in entire game. 
+				EAT_ASSERT(false);
+			}
+			else
+			{
+				sGlobalRunnableClassUserWrapper = pUserWrapper;
+			}
+		}
+
+		Thread::Thread()
+		{
+			mThreadData.mpData = NULL;
+		}
+
+
+		Thread::Thread(const Thread& t) : 
+			mThreadData(t.mThreadData)
+		{
+			if (mThreadData.mpData)
+				mThreadData.mpData->AddRef();
+		}
+
+
+		Thread& Thread::operator=(const Thread& t)
+		{
+			// We don't synchronize access to mpData; we assume that the user 
+			// synchronizes it or this Thread instances is used from a single thread.
+			if (t.mThreadData.mpData)
+				t.mThreadData.mpData->AddRef();
+
+			if (mThreadData.mpData)
+				mThreadData.mpData->Release();
+
+			mThreadData = t.mThreadData;
+
+			return *this;
+		}
+
+
+		Thread::~Thread()
+		{
+			// We don't synchronize access to mpData; we assume that the user 
+			// synchronizes it or this Thread instances is used from a single thread.
+			if (mThreadData.mpData)
+				mThreadData.mpData->Release();
+		}
+
+		static void RunnableFunctionInternal(EAThreadDynamicData* tdd, void* userFunc, void* userContext, void* userWrapperFunc)
+		{
+			tdd->mStatus = Thread::kStatusRunning;
+			tdd->mpStackBase = EA::Thread::GetStackBase();
+			RunnableFunction pFunction = (RunnableFunction)userFunc;
+
+			if (userWrapperFunc)
+			{
+				RunnableFunctionUserWrapper pWrapperFunction = (RunnableFunctionUserWrapper)userWrapperFunc;
+				// if user wrapper is specified, call user wrapper and pass down the pFunction and pContext
+				tdd->mpComp->mReturnPromise.set_value(pWrapperFunction(pFunction, userContext));
+			}
+			else
+			{
+				tdd->mpComp->mReturnPromise.set_value(pFunction(userContext));
+			}
+
+			tdd->mStatus = Thread::kStatusEnded;
+			tdd->Release(); // Matches an implicit AddRef in EAThreadDynamicData constructor
+		}
+
+
+		ThreadId Thread::Begin(RunnableFunction pFunction, void* pContext, const ThreadParameters* pTP, RunnableFunctionUserWrapper pUserWrapper)
+		{
+			// Check there is an entry for the current thread context in our ThreadDynamicData array.            
+			ThreadUniqueId threadUniqueId;
+			EAThreadGetUniqueId(threadUniqueId);
+			if(!FindThreadDynamicData(threadUniqueId))
+			{
+				EAThreadDynamicData* pData = new(AllocateThreadDynamicData()) EAThreadDynamicData(threadUniqueId, "external");
+				if(pData)
+				{
+					pData->AddRef(); // AddRef for ourselves, to be released upon this Thread class being deleted or upon Begin being called again for a new thread.
+									 // Do no AddRef for thread execution because this is not an EAThread managed thread.
+				}
+			}
+
+			if (mThreadData.mpData)
+				mThreadData.mpData->Release(); // Matches an implicit AddRef in EAThreadDynamicData constructor
+
+			// C++11 Threads don't support user-supplied stacks. A user-supplied stack pointer 
+			// here would be a waste of user memory, and so we assert that mpStack == NULL.
+			EAT_ASSERT(!pTP || (pTP->mpStack == NULL));
+
+			// We use the pData temporary throughout this function because it's possible that mThreadData.mpData could be 
+			// modified as we are executing, in particular in the case that mThreadData.mpData is destroyed and changed 
+			// during execution.
+			EAThreadDynamicData* pDataAddr = AllocateThreadDynamicData();
+			EAT_ASSERT(pDataAddr != nullptr);
+			EAThreadDynamicData* pData = new(pDataAddr) EAThreadDynamicData(pFunction, pContext, pUserWrapper, RunnableFunctionInternal); // Note that we use a special new here which doesn't use the heap.
+			EAT_ASSERT(pData != nullptr);
+			mThreadData.mpData = pData;
+			if (pTP)
+				SetName(pTP->mpName);
+
+			return pData->mpComp->mThread.get_id();
+		}
+
+		static void RunnableObjectInternal(EAThreadDynamicData* tdd, void* userFunc, void* userContext, void* userWrapperFunc)
+		{
+			tdd->mStatus = Thread::kStatusRunning;
+			IRunnable* pRunnable = (IRunnable*)userFunc;
+
+			if (userWrapperFunc)
+			{
+				RunnableClassUserWrapper pWrapperFunction = (RunnableClassUserWrapper)userWrapperFunc;
+				// if user wrapper is specified, call user wrapper and pass down the pFunction and pContext
+				tdd->mpComp->mReturnPromise.set_value(pWrapperFunction(pRunnable, userContext));
+			}
+			else
+			{
+				tdd->mpComp->mReturnPromise.set_value(pRunnable->Run(userContext));
+			}
+
+			tdd->mStatus = Thread::kStatusEnded;
+			tdd->Release(); // Matches implicit AddRef in EAThreadDynamicData constructor
+		}
+
+
+		ThreadId Thread::Begin(IRunnable* pRunnable, void* pContext, const ThreadParameters* pTP, RunnableClassUserWrapper pUserWrapper)
+		{
+			if (mThreadData.mpData)
+				mThreadData.mpData->Release(); // Matches an implicit AddRef in EAThreadDynamicData constructor
+
+			// C++11 Threads don't support user-supplied stacks. A user-supplied stack pointer 
+			// here would be a waste of user memory, and so we assert that mpStack == NULL.
+			EAT_ASSERT(!pTP || (pTP->mpStack == NULL));
+
+			// We use the pData temporary throughout this function because it's possible that mThreadData.mpData could be 
+			// modified as we are executing, in particular in the case that mThreadData.mpData is destroyed and changed 
+			// during execution.
+			EAThreadDynamicData* pDataAddr = AllocateThreadDynamicData();
+			EAT_ASSERT(pDataAddr != nullptr);
+			EAThreadDynamicData* pData = new(pDataAddr) EAThreadDynamicData(pRunnable, pContext, pUserWrapper, RunnableObjectInternal); // Note that we use a special new here which doesn't use the heap.
+			EAT_ASSERT(pData != nullptr);
+			mThreadData.mpData = pData;
+			if (pTP)
+				SetName(pTP->mpName);
+
+			EAT_ASSERT(pData && pData->mpComp);
+			return pData->mpComp->mThread.get_id();
+		}
+
+		Thread::Status Thread::WaitForEnd(const ThreadTime& timeoutAbsolute, intptr_t* pThreadReturnValue)
+		{
+			// The mThreadData memory is shared between threads and when 
+			// reading it we must be synchronized.
+			EAReadWriteBarrier();
+
+			// A mutex lock around mpData is not needed below because 
+			// mpData is never allowed to go from non-NULL to NULL. 
+			// Todo: Consider that there may be a subtle race condition here if 
+			// the user immediately calls WaitForEnd right after calling Begin.
+			if (mThreadData.mpData && mThreadData.mpData->mpComp)
+			{
+				// We must not call WaitForEnd from the thread we are waiting to end. That would result in a deadlock.
+				EAT_ASSERT(mThreadData.mpData->mpComp->mThread.get_id() != GetThreadId());
+
+				std::chrono::milliseconds timeoutAbsoluteMs(timeoutAbsolute);
+				std::chrono::time_point<std::chrono::system_clock> timeoutTime(timeoutAbsoluteMs);
+				if (mThreadData.mpData->mpComp->mReturnFuture.wait_until(timeoutTime) == std::future_status::timeout)
+				{
+					return kStatusRunning;
+				}
+
+				if (pThreadReturnValue)
+				{
+					mThreadData.mpData->mReturnValue = mThreadData.mpData->mpComp->mReturnFuture.get();
+					*pThreadReturnValue = mThreadData.mpData->mReturnValue;
+				}
+
+				mThreadData.mpData->mpComp->mThread.join();
+
+				return kStatusEnded; // A thread was created, so it must have ended.
+			}
+			else
+			{
+				// Else the user hasn't started the thread yet, so we wait until the user starts it.
+				// Ideally, what we really want to do here is wait for some kind of signal. 
+				// Instead for the time being we do a polling loop. 
+				while ((!mThreadData.mpData) && (GetThreadTime() < timeoutAbsolute))
+				{
+					ThreadSleep(1);
+				}
+				if (mThreadData.mpData)
+					return WaitForEnd(timeoutAbsolute);
+			}
+			return kStatusNone; // No thread has been started.
+		}
+
+		Thread::Status Thread::GetStatus(intptr_t* pThreadReturnValue) const
+		{
+			if (mThreadData.mpData && mThreadData.mpData->mpComp)
+			{
+				auto status = static_cast<Thread::Status>(mThreadData.mpData->mStatus.GetValue());
+				if (pThreadReturnValue && status == kStatusEnded)
+				{
+					if (mThreadData.mpData->mpComp->mGetStatusFuture.valid())
+						mThreadData.mpData->mReturnValue = mThreadData.mpData->mpComp->mGetStatusFuture.get();
+					*pThreadReturnValue = mThreadData.mpData->mReturnValue;
+				}
+				return status;
+			}
+			return kStatusNone;
+		}
+
+		int Thread::GetPriority() const
+		{
+			// No way to query or set thread priority through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs using native_handle()
+			return kThreadPriorityDefault;
+		}
+
+
+		bool Thread::SetPriority(int nPriority)
+		{
+			// No way to query or set thread priority through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs using native_handle()
+			return false;
+		}
+
+
+		void Thread::SetProcessor(int nProcessor)
+		{
+			// No way to query or set thread priority through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs using native_handle()
+		}
+
+		void EA::Thread::Thread::SetAffinityMask(EA::Thread::ThreadAffinityMask nAffinityMask)
+		{
+			if(mThreadData.mpData)
+			{
+				EA::Thread::SetThreadAffinityMask(nAffinityMask);
+			}
+		}
+
+		EA::Thread::ThreadAffinityMask EA::Thread::Thread::GetAffinityMask()
+		{
+			if(mThreadData.mpData)
+			{
+				return mThreadData.mpData->mnThreadAffinityMask;
+			}
+
+			return kThreadAffinityMaskAny;
+		}
+
+		void Thread::Wake()
+		{
+			// No way to wake a thread through standard C++11 thread library.
+			// On some platforms this could be implemented through platform specific APIs using native_handle()
+		}
+
+
+		const char* Thread::GetName() const
+		{
+			if (mThreadData.mpData)
+				return mThreadData.mpData->mName;
+			return "";
+		}
+
+
+		void Thread::SetName(const char* pName)
+		{
+			if (mThreadData.mpData && pName)
+			{
+				strncpy(mThreadData.mpData->mName, pName, EATHREAD_NAME_SIZE);
+				mThreadData.mpData->mName[EATHREAD_NAME_SIZE - 1] = 0;
+			}
+		}
+
+		ThreadId Thread::GetId() const
+		{
+			if (mThreadData.mpData && mThreadData.mpData->mpComp)
+				return mThreadData.mpData->mpComp->mThread.get_id();
+			return kThreadIdInvalid;
+		}
+
+	}
+}
+

+ 42 - 0
source/deprecated.cpp

@@ -0,0 +1,42 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include "eathread/internal/deprecated.h"
+#include <eathread/eathread.h>
+#include <stdio.h>
+
+namespace EA {
+namespace Thread {
+		
+EATHREADLIB_API void WarnOnce(bool* pHasTriggered, const char* message)
+{
+	EA_UNUSED(pHasTriggered);
+	EA_UNUSED(message);
+#if EAT_ASSERT_ENABLED
+	if (*pHasTriggered == false)
+	{
+		*pHasTriggered = true;
+		// TODO: redirect to debug printing in EAStdC once we have a dependency
+		printf("[EAThread] ***Warning*** %s\n", message);
+	}
+#endif
+}
+
+EATHREADLIB_API void ErrorOnce(bool* pHasTriggered, const char* message)
+{
+	EA_UNUSED(pHasTriggered);
+	EA_UNUSED(message);
+#if EAT_ASSERT_ENABLED
+	if (*pHasTriggered == false)
+	{
+		*pHasTriggered = true;
+		EAT_FAIL_MSG(message);
+	}
+#endif
+}
+
+}} // end namespace EA::Thread
+
+

+ 254 - 0
source/eathread.cpp

@@ -0,0 +1,254 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		EA::Thread::Allocator* gpAllocator = NULL;
+
+		EATHREADLIB_API void SetAllocator(Allocator* pEAThreadAllocator)
+		{
+			gpAllocator = pEAThreadAllocator;
+		}
+
+		EATHREADLIB_API Allocator* GetAllocator()
+		{
+			return gpAllocator;
+		}
+
+
+
+		// Currently we take advantage of the fact that ICoreAllocator
+		// is a binary mapping to EA::Thread::Allocator.
+		// To do: We need to come up with a better solution that this, 
+		//        as it is not future-safe and not even guaranteed to
+		//        be portable. The problem is that we can't make this
+		//        package dependent on the CoreAllocator package without
+		//        breaking users who aren't using it.
+
+		EATHREADLIB_API void SetAllocator(EA::Allocator::ICoreAllocator* pCoreAllocator)
+		{
+			gpAllocator = (EA::Thread::Allocator*)(uintptr_t)pCoreAllocator;
+		}
+
+		EATHREADLIB_API void SetThreadAffinityMask(ThreadAffinityMask nAffinityMask)
+		{ 
+			EA::Thread::SetThreadAffinityMask(GetThreadId(), nAffinityMask);
+		}
+
+		EATHREADLIB_API ThreadAffinityMask GetThreadAffinityMask()
+		{
+			return GetThreadAffinityMask(GetThreadId());
+		}
+	}
+}
+
+#if !EA_THREADS_AVAILABLE
+	//  Do nothing
+#elif EA_USE_CPP11_CONCURRENCY
+	#include "cpp11/eathread_cpp11.cpp"
+#elif defined(EA_PLATFORM_SONY)
+   #include "kettle/eathread_kettle.cpp"
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+   #include "unix/eathread_unix.cpp"
+#elif defined(EA_PLATFORM_MICROSOFT)
+   #include "pc/eathread_pc.cpp"
+#endif
+
+namespace EA
+{
+	namespace Thread
+	{
+		namespace detail
+		{
+			#if !defined(EAThreadIdToString_CUSTOM_IMPLEMENTATION)
+				ThreadIdToStringBuffer::ThreadIdToStringBuffer(EA::Thread::ThreadId threadId)
+				{
+					sprintf(mBuf, "%d", (int)(intptr_t)threadId);
+				}
+
+				SysThreadIdToStringBuffer::SysThreadIdToStringBuffer(EA::Thread::SysThreadId sysThreadId)
+				{
+					sprintf(mBuf, "%d", (int)(intptr_t)sysThreadId);
+				}
+			#endif
+		}
+	}
+}
+
+#if defined(EA_PLATFORM_ANDROID)
+	#if EATHREAD_C11_ATOMICS_AVAILABLE == 0
+		#include "android/eathread_fake_atomic_64.cpp"
+	#endif
+#endif
+
+#if !defined(EAT_ASSERT_SNPRINTF)
+	#if defined(EA_PLATFORM_MICROSOFT)
+		#define EAT_ASSERT_SNPRINTF _vsnprintf
+	#else
+		#define EAT_ASSERT_SNPRINTF snprintf
+	#endif
+#endif
+
+	void EA::Thread::AssertionFailureV(const char* pFormat, ...)
+	{
+		const size_t kBufferSize = 512;
+		char buffer[kBufferSize];
+
+		va_list arguments;
+		va_start(arguments, pFormat);
+		const int nReturnValue = EAT_ASSERT_SNPRINTF(buffer, kBufferSize, pFormat, arguments);
+		va_end(arguments);
+
+		if(nReturnValue > 0)
+		{
+			buffer[kBufferSize - 1] = 0;
+			AssertionFailure(buffer);
+		}
+	}
+
+///////////////////////////////////////////////////////////////////////////////
+// non-threaded implementation
+///////////////////////////////////////////////////////////////////////////////
+
+#if !EA_THREADS_AVAILABLE
+
+	#include <stdio.h>
+	#if defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+		#include <sched.h>
+		#include <sys/time.h>
+	#elif defined(EA_PLATFORM_WINDOWS)
+		extern "C" __declspec(dllimport) void __stdcall Sleep(unsigned long dwMilliseconds);
+	#endif
+
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			// Assertion variables.
+			EA::Thread::AssertionFailureFunction gpAssertionFailureFunction = NULL;
+			void*                                gpAssertionFailureContext  = NULL;
+		}
+	}
+
+	EA::Thread::ThreadId EA::Thread::GetThreadId()
+	{
+		 return 1;
+	}
+
+
+	int EA::Thread::GetThreadPriority()
+	{
+		return kThreadPriorityDefault;
+	}
+
+
+	bool EA::Thread::SetThreadPriority(int nPriority)
+	{
+		return true;
+	}
+
+
+	void* EA::Thread::GetThreadStackBase()
+	{
+		return NULL;
+	}
+
+
+	void EA::Thread::SetThreadProcessor(int /*nProcessor*/)
+	{
+	}
+
+
+	int EA::Thread::GetThreadProcessor()
+	{
+		return 0;
+	}
+
+
+	int EA::Thread::GetProcessorCount()
+	{
+		return 1;
+	}
+
+
+	void EA::Thread::ThreadSleep(const ThreadTime& timeRelative)
+	{
+		#if defined(EA_PLATFORM_WINDOWS)
+
+			// There is no nanosleep on Windows, but there is Sleep.
+			if(timeRelative == kTimeoutImmediate)
+				Sleep(0);
+			else
+				Sleep((unsigned)((timeRelative.tv_sec * 1000) + (((timeRelative.tv_nsec % 1000) * 1000000))));
+
+		#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+
+			if(timeRelative == kTimeoutImmediate)
+				sched_yield();
+			else
+				nanosleep(&timeRelative, 0);
+
+		#endif
+	}
+
+
+	void EA::Thread::ThreadEnd(intptr_t /*threadReturnValue*/)
+	{
+		// We could possibly call exit here.
+	}
+
+
+	EA::Thread::ThreadTime EA::Thread::GetThreadTime()
+	{
+		#if defined(EA_PLATFORM_WINDOWS)
+
+			return (ThreadTime)GetTickCount();
+
+		#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+
+			#if defined(EA_PLATFORM_LINUX) || defined(__CYGWIN__) || (_POSIX_TIMERS > 0)
+				ThreadTime threadTime;
+				clock_gettime(CLOCK_REALTIME, &threadTime);  // If you get a linker error about clock_getttime, you need to link librt.a (specify -lrt to the linker).
+				return threadTime;
+			#else
+				timeval temp;
+				gettimeofday(&temp, NULL);
+				return ThreadTime(temp.tv_sec, temp.tv_usec * 1000);    
+			#endif
+
+		#endif
+	}
+
+
+	void EA::Thread::SetAssertionFailureFunction(EA::Thread::AssertionFailureFunction pAssertionFailureFunction, void* pContext)
+	{
+		gpAssertionFailureFunction = pAssertionFailureFunction;
+		gpAssertionFailureContext  = pContext;
+	}
+
+
+	void EA::Thread::AssertionFailure(const char* pExpression)
+	{
+		if(gpAssertionFailureFunction)
+			gpAssertionFailureFunction(pExpression, gpAssertionFailureContext);
+		else
+		{
+			#if EAT_ASSERT_ENABLED
+				printf("EA::Thread::AssertionFailure: %s\n", pExpression);
+			#endif
+		}
+	}
+
+
+#endif // EA_THREADS_AVAILABLE
+

+ 170 - 0
source/eathread_atomic.cpp

@@ -0,0 +1,170 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread_atomic.h>
+
+
+#if EATHREAD_HAS_EMULATED_AND_NATIVE_ATOMICS
+
+#include <eathread/internal/atomic.h>
+#include <eathread/eathread_futex.h>
+#include <stdio.h>
+
+#if __APPLE__
+	#include <libkern/OSAtomic.h>
+#endif
+
+// Currently iPhone/iOS defaults to emulated atomics. The reason for this is that
+// some older iPhone firmware versions have broken 64 bit atomics which are useless.
+// The only other platform where it is possible to switch between native and emulated
+// atomics is desktop OS X, and we default to native atomics there.
+#if !defined(EATHREAD_DEFAULT_TO_EMULATED_ATOMIC64)
+	#define EATHREAD_DEFAULT_TO_EMULATED_ATOMIC64 0
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		static Futex gEmulatedAtomicFutex;
+
+
+		static inline void PrintEmulationWarningMessage()
+		{
+			#if EAT_ASSERT_ENABLED
+				static bool gHavePrintedEmulationWarningMessage = false;
+
+				if (!gHavePrintedEmulationWarningMessage)
+				{
+					printf("WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING\n"
+							"    EAThread is currently configured to use emulated 64-bit atomics.\n"
+							"    This can be a performance hazard on architectures that natively support\n"
+							"    these instructions. If you know that this platform supports native 64-bit\n"
+							"    atomics, call EA::Thread::SetAtomic64Implementation(kAtomic64Native).\n"
+							"    If the platform does not currently support 64-bit atomics then disregard\n"
+							"    this message.\n");
+					gHavePrintedEmulationWarningMessage = true;
+				}
+			#endif
+		}
+
+
+		static int64_t AtomicAdd64Emulated(volatile int64_t *ptr, int64_t value)
+		{
+			AutoFutex autoFutex(gEmulatedAtomicFutex);
+			PrintEmulationWarningMessage();
+
+			const int64_t oldValue = *ptr;
+			const int64_t newValue = oldValue + value;
+			*ptr = newValue;
+
+			return newValue;
+		}
+
+
+		static int64_t AtomicGetValue64Emulated(volatile int64_t *ptr)
+		{
+			AutoFutex autoFutex(gEmulatedAtomicFutex);
+			PrintEmulationWarningMessage();
+
+			return *ptr;
+		}
+
+
+		static int64_t AtomicSetValue64Emulated(volatile int64_t *ptr, int64_t value)
+		{
+			AutoFutex autoFutex(gEmulatedAtomicFutex);
+			PrintEmulationWarningMessage();
+
+			const int64_t oldValue = *ptr;
+			*ptr = value;
+
+			return oldValue;
+		}
+
+
+		static bool AtomicSetValueConditional64Emulated(volatile int64_t *ptr, int64_t value, int64_t condition)
+		{
+			AutoFutex autoFutex(gEmulatedAtomicFutex);
+			PrintEmulationWarningMessage();
+
+			const int64_t oldValue = *ptr;
+			if (oldValue == condition)
+			{
+				*ptr = value;
+				return true;
+			}
+
+			return false;
+		}
+
+
+		#if __APPLE__
+			static int64_t AtomicAdd64Native(volatile int64_t *ptr, int64_t value)
+			{
+				return OSAtomicAdd64(value, ptr);
+			}
+
+
+			static int64_t AtomicSetValue64Native(volatile int64_t *ptr, int64_t value)
+			{
+				int64_t old;
+				do
+				{
+					old = *ptr;
+				} while (!OSAtomicCompareAndSwap64(old, value, ptr));
+				return old;
+			}
+
+
+			static bool AtomicSetValueConditional64Native(volatile int64_t *ptr, int64_t value, int64_t condition)
+			{
+				return OSAtomicCompareAndSwap64(condition, value, ptr);
+			}
+
+
+			static int64_t AtomicGetValue64Native(volatile int64_t *ptr)
+			{
+				return AtomicAdd64Native(ptr, 0);
+			}
+		#endif
+
+
+		#if EATHREAD_DEFAULT_TO_EMULATED_ATOMIC64
+			AtomicAdd64Function                 AtomicAdd64                 = AtomicAdd64Emulated;
+			AtomicGetValue64Function            AtomicGetValue64            = AtomicGetValue64Emulated;
+			AtomicSetValue64Function            AtomicSetValue64            = AtomicSetValue64Emulated;
+			AtomicSetValueConditional64Function AtomicSetValueConditional64 = AtomicSetValueConditional64Emulated;
+		#else
+			AtomicAdd64Function                 AtomicAdd64                 = AtomicAdd64Native;
+			AtomicGetValue64Function            AtomicGetValue64            = AtomicGetValue64Native;
+			AtomicSetValue64Function            AtomicSetValue64            = AtomicSetValue64Native;
+			AtomicSetValueConditional64Function AtomicSetValueConditional64 = AtomicSetValueConditional64Native;
+		#endif
+
+
+		void SetAtomic64Implementation(Atomic64Implementation implementation)
+		{
+			if (implementation == kAtomic64Emulated)
+			{
+				AtomicAdd64                 = AtomicAdd64Emulated;
+				AtomicGetValue64            = AtomicGetValue64Emulated;
+				AtomicSetValue64            = AtomicSetValue64Emulated;
+				AtomicSetValueConditional64 = AtomicSetValueConditional64Emulated;
+			}
+			else
+			{
+				AtomicAdd64                 = AtomicAdd64Native;
+				AtomicGetValue64            = AtomicGetValue64Native;
+				AtomicSetValue64            = AtomicSetValue64Native;
+				AtomicSetValueConditional64 = AtomicSetValueConditional64Native;
+			}
+		}
+	}
+}
+
+#endif
+

+ 194 - 0
source/eathread_barrier.cpp

@@ -0,0 +1,194 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <new>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PLATFORM_SONY)
+	#include "kettle/eathread_barrier_kettle.cpp"
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+	// Posix already defines a barrier (via condition variables or directly with pthread_barrier).
+	#include "unix/eathread_barrier_unix.cpp"
+
+#else // All other platforms
+
+	#include <eathread/eathread_barrier.h>
+	#include <string.h>
+
+
+	EABarrierData::EABarrierData() 
+		: mnCurrent(0), mnHeight(0), mnIndex(0), mSemaphore0(NULL, false), mSemaphore1(NULL, false) 
+	{
+		// Leave mSemaphores alone for now. We leave them constructed but not initialized.
+	}
+
+
+	EA::Thread::BarrierParameters::BarrierParameters(int height, bool bIntraProcess, const char* pName)
+		: mHeight(height), mbIntraProcess(bIntraProcess)
+	{
+		if(pName)
+		{
+			EA_DISABLE_VC_WARNING(4996); // This function or variable may be unsafe / deprecated. 
+			strncpy(mName, pName, sizeof(mName)-1);
+			EA_RESTORE_VC_WARNING();
+			mName[sizeof(mName)-1] = 0;
+		}
+		else
+			mName[0] = 0;
+	}
+
+
+	EA::Thread::Barrier::Barrier(const BarrierParameters* pBarrierParameters, bool bDefaultParameters)
+	{
+		if(!pBarrierParameters && bDefaultParameters)
+		{
+			BarrierParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pBarrierParameters);
+	}
+
+
+	EA::Thread::Barrier::Barrier(int height)
+	{
+		BarrierParameters parameters(height);
+		Init(&parameters);
+	}
+
+
+	EA::Thread::Barrier::~Barrier()
+	{
+		// Nothing to do.
+	}
+
+
+	bool EA::Thread::Barrier::Init(const BarrierParameters* pBarrierParameters)
+	{
+		// You cannot set the height after it's already been set.
+		EAT_ASSERT((mBarrierData.mnHeight == 0) && (mBarrierData.mnCurrent == 0));
+
+		if(pBarrierParameters && (mBarrierData.mnHeight == 0))
+		{
+			mBarrierData.mnHeight  = pBarrierParameters->mHeight; // We don't put mutex lock around this as it is only to be ever set once, before use.
+			mBarrierData.mnCurrent = pBarrierParameters->mHeight;
+
+			SemaphoreParameters sp(0, pBarrierParameters->mbIntraProcess);
+			mBarrierData.mSemaphore0.Init(&sp);
+			mBarrierData.mSemaphore1.Init(&sp);
+
+			return true;
+		}
+
+		return false;
+	}
+
+
+	EA::Thread::Barrier::Result EA::Thread::Barrier::Wait(const ThreadTime& timeoutAbsolute)
+	{
+		int result;
+		const int nCurrentIndex = (int)mBarrierData.mnIndex;
+
+		// Question: What do we do if a fifth thread calls Wait on a barrier with height 
+		// of four after the fourth thread has decremented the current count below?
+
+		EAT_ASSERT(mBarrierData.mnCurrent > 0); // If this assert fails then it means that more threads are waiting on the barrier than the barrier height.
+
+		const int32_t nCurrent = mBarrierData.mnCurrent.Decrement(); // atomic integer operation.
+
+		if(nCurrent == 0) // If the barrier has been breached... 
+		{
+			mBarrierData.mnCurrent = mBarrierData.mnHeight;
+
+			if(mBarrierData.mnHeight > 1) // If there are threads other than us...
+			{
+				// We don't have a potential race condition here because we use alternating
+				// semaphores and since we are here, all other threads are waiting on the 
+				// current semaphore below. And if they haven't started waiting on the 
+				// semaphore yet, they'll succeed anyway because we Post all directly below.
+				Semaphore* const pSemaphore = (nCurrentIndex == 0 ? &mBarrierData.mSemaphore0 : &mBarrierData.mSemaphore1);
+
+				result = pSemaphore->Post(mBarrierData.mnHeight - 1); // Upon success, the return value will in practice be >= 1, but semaphore defines success as >= 0.
+			}
+			else // Else we are the only thead.
+				result = 0;
+		}
+		else
+		{
+			Semaphore* const pSemaphore = (nCurrentIndex == 0 ? &mBarrierData.mSemaphore0 : &mBarrierData.mSemaphore1);
+
+			result = pSemaphore->Wait(timeoutAbsolute);
+
+			if(result == Semaphore::kResultTimeout)
+				return kResultTimeout;
+		}
+
+		if(result >= 0) // If the result wasn't an error such as Semaphore::kResultError or Semaphore::kResultTimeout.
+		{
+			// Use an atomic operation to change the index, which conveniently gives us a thread to designate as primary.
+			EAT_ASSERT((unsigned)nCurrentIndex <= 1);
+
+			if(mBarrierData.mnIndex.SetValueConditional(1 - nCurrentIndex, nCurrentIndex))  // Toggle value between 0 and 1.
+				return kResultPrimary;
+
+			return kResultSecondary;
+		}
+
+		return kResultError;
+	}
+
+
+	EA::Thread::Barrier* EA::Thread::BarrierFactory::CreateBarrier()
+	{
+		EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+		if(pAllocator)
+			return new(pAllocator->Alloc(sizeof(EA::Thread::Barrier))) EA::Thread::Barrier;
+		else
+			return new EA::Thread::Barrier;
+	}
+
+	void EA::Thread::BarrierFactory::DestroyBarrier(EA::Thread::Barrier* pBarrier)
+	{
+		EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+		if(pAllocator)
+		{
+			pBarrier->~Barrier();
+			pAllocator->Free(pBarrier);
+		}
+		else
+			delete pBarrier;
+	}
+
+	size_t EA::Thread::BarrierFactory::GetBarrierSize()
+	{
+		return sizeof(EA::Thread::Barrier);
+	}
+
+	EA::Thread::Barrier* EA::Thread::BarrierFactory::ConstructBarrier(void* pMemory)
+	{
+		return new(pMemory) EA::Thread::Barrier;
+	}
+
+	void EA::Thread::BarrierFactory::DestructBarrier(EA::Thread::Barrier* pBarrier)
+	{
+		pBarrier->~Barrier();
+	}
+
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+
+
+
+

+ 36 - 0
source/eathread_callstack.cpp

@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+
+#if defined(EA_PLATFORM_WIN32) && EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+	#include "pc/eathread_callstack_win32.cpp"
+#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
+	#include "pc/eathread_callstack_win64.cpp"
+#elif defined(EA_PLATFORM_SONY)
+	#include "kettle/eathread_callstack_kettle.cpp"
+	#include "kettle/eathread_pthread_stack_info.cpp"
+#elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_X86)
+	#include "x86/eathread_callstack_x86.cpp"
+	#include "unix/eathread_pthread_stack_info.cpp"
+#elif defined(EA_PLATFORM_ANDROID)
+	#include "libunwind/eathread_callstack_libunwind.cpp"
+	#include "unix/eathread_pthread_stack_info.cpp"
+#elif defined(EA_PLATFORM_APPLE) // OSX, iPhone, iPhone Simulator
+	#include "apple/eathread_callstack_apple.cpp"
+	#include "unix/eathread_pthread_stack_info.cpp"
+#elif defined(EA_PROCESSOR_ARM) 
+	#include "arm/eathread_callstack_arm.cpp"
+	#if !defined(EA_PLATFORM_MICROSOFT)
+		#include "unix/eathread_pthread_stack_info.cpp"
+	#endif
+#elif (defined(EA_PLATFORM_LINUX) || defined(__CYGWIN__)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64))
+	#include "x86/eathread_callstack_x86.cpp"
+	#include "unix/eathread_pthread_stack_info.cpp"
+#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)
+	#include "unix/eathread_callstack_glibc.cpp"
+	#include "unix/eathread_pthread_stack_info.cpp"
+#else
+	#include "null/eathread_callstack_null.cpp"
+#endif

+ 271 - 0
source/eathread_condition.cpp

@@ -0,0 +1,271 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <new>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PLATFORM_SONY)
+	// Posix already defines a Condition (via condition variables).
+	#include "kettle/eathread_condition_kettle.cpp"
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EA_THREADS_AVAILABLE
+	// Posix already defines a Condition (via condition variables).
+	#include "unix/eathread_condition_unix.cpp"
+
+#else // All other platforms
+
+	#include <eathread/eathread_condition.h>
+	#include <string.h>
+
+
+	#ifdef _MSC_VER
+		#pragma warning(disable: 4996) // This function or variable may be unsafe / deprecated.
+	#endif
+
+
+	EAConditionData::EAConditionData()
+	   : mnWaitersBlocked(0), mnWaitersToUnblock(0), mnWaitersDone(0),
+		 mSemaphoreBlockQueue(NULL, false), // We will be initializing these ourselves specifically below.
+		 mSemaphoreBlockLock(NULL, false),
+		 mUnblockLock(NULL, false)
+	{
+		// Empty
+	}
+
+
+	EA::Thread::ConditionParameters::ConditionParameters(bool bIntraProcess, const char* pName)
+		: mbIntraProcess(bIntraProcess)
+	{
+		if(pName)
+		{
+			strncpy(mName, pName, sizeof(mName)-1);
+			mName[sizeof(mName)-1] = 0;
+		}
+		else
+			mName[0] = 0;
+	}
+
+
+	EA::Thread::Condition::Condition(const ConditionParameters* pConditionParameters, bool bDefaultParameters)
+	{
+		if(!pConditionParameters && bDefaultParameters)
+		{
+			ConditionParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pConditionParameters);
+	}
+
+
+	EA::Thread::Condition::~Condition()
+	{
+		// Empty
+	}
+
+
+	bool EA::Thread::Condition::Init(const ConditionParameters* pConditionParameters)
+	{
+		if(pConditionParameters)
+		{
+			// We have a problem with naming here. We implement our Condition variable with two semaphores and a mutex.
+			// It's not possible to have them all have the same name, since the OS will think you want them to be
+			// shared instances. What we really need is an explicit debug name that is separate from the OS name. 
+			// And the ConditionParameters::mName should be that debug name only and not be applied to the child primitives.
+
+			const SemaphoreParameters sp1(0, pConditionParameters->mbIntraProcess, NULL);   // Set the name to NULL, regardless of what pConditionParameters->mName is. 
+			const SemaphoreParameters sp2(1, pConditionParameters->mbIntraProcess, NULL);
+			const MutexParameters     mp(pConditionParameters->mbIntraProcess,     NULL);
+
+			if(mConditionData.mSemaphoreBlockQueue.Init(&sp1) && 
+			   mConditionData.mSemaphoreBlockLock .Init(&sp2) && 
+			   mConditionData.mUnblockLock.Init(&mp))
+			{
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+
+	EA::Thread::Condition::Result EA::Thread::Condition::Wait(Mutex* pMutex, const ThreadTime& timeoutAbsolute)
+	{
+		int lockResult, result;
+
+		EAT_ASSERT(pMutex); // The user is required to pass a valid Mutex pointer.
+
+		++mConditionData.mnWaitersBlocked; // Note that this is an atomic operation.
+
+		EAT_ASSERT(pMutex->GetLockCount() == 1);
+		lockResult = pMutex->Unlock();
+		if(lockResult < 0)
+			return (Result)lockResult;
+
+		result = mConditionData.mSemaphoreBlockQueue.Wait(timeoutAbsolute);
+		EAT_ASSERT(result != EA::Thread::Semaphore::kResultError);
+		// Regardless of the result of the above error, we must press on with the code below.
+
+		mConditionData.mUnblockLock.Lock();
+		
+		const int nWaitersToUnblock = mConditionData.mnWaitersToUnblock;
+
+		if(nWaitersToUnblock != 0)
+			--mConditionData.mnWaitersToUnblock;
+		else if(++mConditionData.mnWaitersDone == (INT_MAX / 2)) // This is not an atomic operation. We are within a mutex lock.
+		{ 
+			// Normally this doesn't happen, but can happen under very 
+			// unusual circumstances, such as spurious semaphore signals
+			// or cases whereby many many threads are timing out.
+			EAT_ASSERT(false);
+			mConditionData.mSemaphoreBlockLock.Wait();
+			mConditionData.mnWaitersBlocked -= mConditionData.mnWaitersDone;
+			mConditionData.mSemaphoreBlockLock.Post();
+			mConditionData.mnWaitersDone = 0;
+		}
+
+		mConditionData.mUnblockLock.Unlock();
+
+		if(nWaitersToUnblock == 1) // If we were the last...
+			mConditionData.mSemaphoreBlockLock.Post();
+
+		// We cannot apply a timeout here. The caller always expects to have the 
+		// lock upon return, even in the case of a wait timeout. Similarly, we 
+		// may or may not want the result of the lock attempt to be propogated
+		// back to the caller. In this case, we do if it is an error.
+		lockResult = pMutex->Lock();
+
+		if(lockResult == Mutex::kResultError)
+			return kResultError;
+		else if(result >= 0)
+			return kResultOK;
+
+		return (Result)result; // This is the result of the wait call above.
+	}
+
+
+	bool EA::Thread::Condition::Signal(bool bBroadcast)
+	{
+		int result;
+		int nSignalsToIssue;
+
+		result = mConditionData.mUnblockLock.Lock();
+
+		if(result < 0)
+			return false;
+
+		if(mConditionData.mnWaitersToUnblock)
+		{
+			if(mConditionData.mnWaitersBlocked == 0)
+			{
+				mConditionData.mUnblockLock.Unlock();
+				return true;
+			}
+
+			if(bBroadcast)
+			{
+				nSignalsToIssue = (int)mConditionData.mnWaitersBlocked.SetValue(0);
+				mConditionData.mnWaitersToUnblock += nSignalsToIssue;
+			}
+			else
+			{
+				nSignalsToIssue = 1;
+				mConditionData.mnWaitersToUnblock++;
+				mConditionData.mnWaitersBlocked--;
+			}
+		}
+		else if(mConditionData.mnWaitersBlocked > mConditionData.mnWaitersDone)
+		{
+			if(mConditionData.mSemaphoreBlockLock.Wait() == EA::Thread::Semaphore::kResultError)
+			{
+				mConditionData.mUnblockLock.Unlock();
+				return false;
+			}
+
+			if(mConditionData.mnWaitersDone != 0)
+			{
+				mConditionData.mnWaitersBlocked -= mConditionData.mnWaitersDone;
+				mConditionData.mnWaitersDone     = 0;
+			}
+
+			if(bBroadcast)
+			{
+				nSignalsToIssue = mConditionData.mnWaitersToUnblock = (int)mConditionData.mnWaitersBlocked.SetValue(0);
+			}
+			else
+			{
+				nSignalsToIssue = mConditionData.mnWaitersToUnblock = 1;
+				mConditionData.mnWaitersBlocked--;
+			}
+		}
+		else
+		{
+			mConditionData.mUnblockLock.Unlock();
+			return true;
+		}
+
+		mConditionData.mUnblockLock.Unlock();
+		mConditionData.mSemaphoreBlockQueue.Post(nSignalsToIssue);
+
+		return true;
+	}
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+EA::Thread::Condition* EA::Thread::ConditionFactory::CreateCondition()
+{
+	Allocator* pAllocator = GetAllocator();
+
+	if(pAllocator)
+		return new(pAllocator->Alloc(sizeof(EA::Thread::Condition))) EA::Thread::Condition;
+	else
+		return new EA::Thread::Condition;
+}
+
+void EA::Thread::ConditionFactory::DestroyCondition(EA::Thread::Condition* pCondition)
+{
+	Allocator* pAllocator = GetAllocator();
+
+	if(pAllocator)
+	{
+		pCondition->~Condition();
+		pAllocator->Free(pCondition);
+	}
+	else
+		delete pCondition;
+}
+
+size_t EA::Thread::ConditionFactory::GetConditionSize()
+{
+	return sizeof(EA::Thread::Condition);
+}
+
+EA::Thread::Condition* EA::Thread::ConditionFactory::ConstructCondition(void* pMemory)
+{
+	return new(pMemory) EA::Thread::Condition;
+}
+
+void EA::Thread::ConditionFactory::DestructCondition(EA::Thread::Condition* pCondition)
+{
+	pCondition->~Condition();
+}
+
+
+
+
+
+
+
+
+
+
+
+
+

+ 335 - 0
source/eathread_futex.cpp

@@ -0,0 +1,335 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/eathread_futex.h>
+#include <new>
+
+#if defined(EA_THREAD_NONTHREADED_FUTEX) && EA_THREAD_NONTHREADED_FUTEX
+
+	void EA::Thread::Futex::CreateFSemaphore()
+	{
+		mSemaphore.mnCount = 0;
+	}
+
+	void EA::Thread::Futex::DestroyFSemaphore()
+	{
+		// Do nothing;
+	}
+
+	void EA::Thread::Futex::SignalFSemaphore()
+	{
+		mSemaphore.mnCount++;
+	}
+
+	void EA::Thread::Futex::WaitFSemaphore()
+	{
+		while(mSemaphore.mnCount <= 0)
+			EA_THREAD_DO_SPIN();
+		mSemaphore.mnCount--;
+	}
+
+	bool EA::Thread::Futex::WaitFSemaphore(const ThreadTime&)
+	{
+		WaitFSemaphore();
+		return true;
+	}
+
+#elif defined(__APPLE__) && EATHREAD_MANUAL_FUTEX_ENABLED
+	#include <semaphore.h>
+	#include <stdio.h>
+	#include <errno.h>
+	#include <string.h>
+	#include <libkern/OSAtomic.h>
+
+	void EA::Thread::Futex::CreateFSemaphore()
+	{   
+		mSemaphore.Init(0);
+	}
+
+	void EA::Thread::Futex::DestroyFSemaphore()
+	{
+		// Do nothing;
+	}
+
+	void EA::Thread::Futex::SignalFSemaphore()
+	{
+		mSemaphore.Post();
+	}
+
+	void EA::Thread::Futex::WaitFSemaphore()
+	{
+		mSemaphore.Wait();
+	}
+
+	bool EA::Thread::Futex::WaitFSemaphore(const ThreadTime& timeoutAbsolute)
+	{
+		return (mSemaphore.Wait(timeoutAbsolute) >= 0);
+	}
+
+#elif defined(EA_PLATFORM_SONY) && !EATHREAD_MANUAL_FUTEX_ENABLED
+	#include <kernel.h>	
+	#include <eathread/eathread_atomic.h>
+
+	EA::Thread::Futex::Futex()
+	: mSpinCount(EATHREAD_FUTEX_SPIN_COUNT)
+	{
+	}
+
+	EA::Thread::Futex::~Futex()
+	{
+	}
+
+	void EA::Thread::Futex::Lock()
+	{
+		Uint spinCount(mSpinCount);
+		while(--spinCount)
+		{
+			if(TryLock())
+				return;
+		}
+
+		mMutex.Lock();
+	}
+
+	void EA::Thread::Futex::Unlock()
+	{
+		mMutex.Unlock();
+	}
+
+	bool EA::Thread::Futex::TryLock()
+	{
+		if(mMutex.Lock(EA::Thread::kTimeoutImmediate) > 0)  // This calls scePthreadMutexTrylock
+			return true;
+
+		return false;
+	}
+
+	int EA::Thread::Futex::Lock(const ThreadTime& timeoutAbsolute)
+	{ 
+		return mMutex.Lock(timeoutAbsolute); 
+	}
+
+	int EA::Thread::Futex::GetLockCount() const
+	{
+		return mMutex.GetLockCount();
+	}  
+
+	bool EA::Thread::Futex::HasLock() const
+	{
+		return mMutex.HasLock();
+	}
+
+	void EA::Thread::Futex::SetSpinCount(Uint spinCount)
+	{ 
+		mSpinCount = spinCount;
+	}
+
+#elif defined(EA_PLATFORM_SONY) && EATHREAD_MANUAL_FUTEX_ENABLED
+	#include <kernel/semaphore.h>
+	#include <sceerror.h>
+
+	void EA::Thread::Futex::CreateFSemaphore()
+	{
+		// To consider: Copy the Futex name into this semaphore name.
+		int result = sceKernelCreateSema(&mSemaphore, "Futex", SCE_KERNEL_SEMA_ATTR_TH_FIFO, 0, 100000, NULL);
+		EA_UNUSED(result);
+		EAT_ASSERT(result == SCE_OK);
+	}
+
+	void EA::Thread::Futex::DestroyFSemaphore()
+	{
+		int result = sceKernelDeleteSema(mSemaphore);
+		EA_UNUSED(result);
+		EAT_ASSERT(result == SCE_OK);
+	}
+
+	void EA::Thread::Futex::SignalFSemaphore()
+	{
+	   int result = sceKernelSignalSema(mSemaphore, 1);
+		EA_UNUSED(result);
+		EAT_ASSERT(result == SCE_OK);
+	}
+
+	void EA::Thread::Futex::WaitFSemaphore()
+	{
+		int result = sceKernelWaitSema(mSemaphore, 1, NULL);
+		EA_UNUSED(result);
+		EAT_ASSERT(result == SCE_OK);
+	}
+
+	bool EA::Thread::Futex::WaitFSemaphore(const ThreadTime& timeoutAbsolute)
+	{
+		SceKernelUseconds timeoutRelativeUs = static_cast<SceKernelUseconds>(RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));        
+		if(timeoutRelativeUs < 1)
+			timeoutRelativeUs = 1;
+
+		return (sceKernelWaitSema(mSemaphore, 1, &timeoutRelativeUs) == SCE_OK);
+	}
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && EATHREAD_MANUAL_FUTEX_ENABLED
+	#include <semaphore.h>
+	#include <errno.h>
+
+	void EA::Thread::Futex::CreateFSemaphore()
+	{   
+		const int result = sem_init(&mSemaphore, 0, 0);
+		(void)result;
+		EAT_ASSERT(result != -1);
+	}
+
+	void EA::Thread::Futex::DestroyFSemaphore()
+	{
+		#if defined (__APPLE__)
+			sem_close(&mSemaphore);
+		#elif defined(EA_PLATFORM_ANDROID)
+			sem_destroy(&mSemaphore);   // Android's sem_destroy is broken. http://code.google.com/p/android/issues/detail?id=3106
+		#else
+			int result = -1;
+	
+			for(;;)
+			{
+				result = sem_destroy(&mSemaphore);
+
+				if((result == -1) && (errno == EBUSY)) // If another thread or process is blocked on this semaphore...
+					ThreadSleep(kTimeoutYield);        // Yield. If we don't yield, it's possible we could block other other threads or processes from running, on some systems.
+				else
+					break;
+			}
+
+			EAT_ASSERT(result != -1);
+		#endif
+	}
+
+	void EA::Thread::Futex::SignalFSemaphore()
+	{
+		sem_post(&mSemaphore);
+	}
+
+	void EA::Thread::Futex::WaitFSemaphore()
+	{
+		// We don't have much choice but to retry interrupted waits,
+		// as there is no lock failure return value.
+		while((sem_wait(&mSemaphore) == -1) && (errno == EINTR))
+			continue;
+	}
+
+	bool EA::Thread::Futex::WaitFSemaphore(const ThreadTime&)
+	{
+		WaitFSemaphore();
+		return true;
+	}
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !EA_USE_CPP11_CONCURRENCY && !EATHREAD_MANUAL_FUTEX_ENABLED
+
+	#pragma warning(push, 0)
+	#include <Windows.h>
+	#pragma warning(pop)
+
+	// Validate what we assume to be invariants.
+	EAT_COMPILETIME_ASSERT(sizeof(CRITICAL_SECTION) <= (EA::Thread::FUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t) * sizeof(uint64_t)));
+
+	#if defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
+		EAT_COMPILETIME_ASSERT(offsetof(CRITICAL_SECTION, RecursionCount) == (3 * sizeof(int)));
+		EAT_COMPILETIME_ASSERT(offsetof(CRITICAL_SECTION, OwningThread)   == (4 * sizeof(int)));
+	#elif defined(EA_PLATFORM_WIN32)
+		EAT_COMPILETIME_ASSERT(offsetof(CRITICAL_SECTION, RecursionCount) == (2 * sizeof(int)));
+		EAT_COMPILETIME_ASSERT(offsetof(CRITICAL_SECTION, OwningThread)   == (3 * sizeof(int)));
+	#else
+		EAT_FAIL_MSG("Need to verify offsetof.");
+	#endif
+
+
+#elif defined(EA_PLATFORM_MICROSOFT) && EATHREAD_MANUAL_FUTEX_ENABLED
+
+	#if defined(EA_PLATFORM_WINDOWS)
+		#pragma warning(push, 0)
+		#include <Windows.h>
+		#pragma warning(pop)
+	#endif
+
+	void EA::Thread::Futex::CreateFSemaphore()
+	{
+		mSemaphore = CreateSemaphoreA(NULL, 0, INT_MAX / 2, NULL);
+		EAT_ASSERT(mSemaphore != 0);
+	}
+
+	void EA::Thread::Futex::DestroyFSemaphore()
+	{
+		if(mSemaphore)
+			CloseHandle(mSemaphore);
+	}
+
+	void EA::Thread::Futex::SignalFSemaphore()
+	{
+		ReleaseSemaphore(mSemaphore, 1, NULL);
+	}
+
+	void EA::Thread::Futex::WaitFSemaphore()
+	{
+		WaitForSingleObject(mSemaphore, INFINITE);
+	}
+
+	bool EA::Thread::Futex::WaitFSemaphore(const ThreadTime& timeoutAbsolute)
+	{
+		int64_t timeoutRelativeMS = (int64_t)(timeoutAbsolute - GetThreadTime());
+		if(timeoutRelativeMS < 1)
+			timeoutRelativeMS = 1;
+		return WaitForSingleObject(mSemaphore, (DWORD)timeoutRelativeMS) == WAIT_OBJECT_0;
+	}
+
+#endif
+
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::Futex* EA::Thread::FutexFactory::CreateFutex()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::Futex))) EA::Thread::Futex;
+	else
+		return new EA::Thread::Futex;
+}
+
+void EA::Thread::FutexFactory::DestroyFutex(EA::Thread::Futex* pFutex)
+{
+	if(gpAllocator)
+	{
+		pFutex->~Futex();
+		gpAllocator->Free(pFutex);
+	}
+	else
+		delete pFutex;
+}
+
+size_t EA::Thread::FutexFactory::GetFutexSize()
+{
+	return sizeof(EA::Thread::Futex);
+}
+
+EA::Thread::Futex* EA::Thread::FutexFactory::ConstructFutex(void* pMemory)
+{
+	return new(pMemory) EA::Thread::Futex;
+}
+
+void EA::Thread::FutexFactory::DestructFutex(EA::Thread::Futex* pFutex)
+{
+	pFutex->~Futex();
+}
+
+
+
+
+
+
+

+ 144 - 0
source/eathread_mutex.cpp

@@ -0,0 +1,144 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <string.h>
+#include <new>
+EA_RESTORE_VC_WARNING()
+
+#if !EA_THREADS_AVAILABLE
+	#include <eathread/eathread_mutex.h>
+#elif EA_USE_CPP11_CONCURRENCY
+	#include "cpp11/eathread_mutex_cpp11.cpp"
+	#if defined(CreateMutex)
+		#undef CreateMutex
+	#endif
+#elif defined(EA_PLATFORM_SONY)
+	#include "kettle/eathread_mutex_kettle.cpp"
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include "unix/eathread_mutex_unix.cpp"
+#elif defined(EA_PLATFORM_MICROSOFT)
+	#include "pc/eathread_mutex_pc.cpp"
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::Mutex* EA::Thread::MutexFactory::CreateMutex()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::Mutex))) EA::Thread::Mutex;
+	else
+		return new EA::Thread::Mutex;
+}
+
+void EA::Thread::MutexFactory::DestroyMutex(EA::Thread::Mutex* pMutex)
+{
+	if(gpAllocator)
+	{
+		pMutex->~Mutex();
+		gpAllocator->Free(pMutex);
+	}
+	else
+		delete pMutex;
+}
+
+size_t EA::Thread::MutexFactory::GetMutexSize()
+{
+	return sizeof(EA::Thread::Mutex);
+}
+
+EA::Thread::Mutex* EA::Thread::MutexFactory::ConstructMutex(void* pMemory)
+{
+	return new(pMemory) EA::Thread::Mutex;
+}
+
+void EA::Thread::MutexFactory::DestructMutex(EA::Thread::Mutex* pMutex)
+{
+	pMutex->~Mutex();
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// non-threaded implementation
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(EA_THREAD_NONTHREADED_MUTEX) && EA_THREAD_NONTHREADED_MUTEX
+
+	EAMutexData::EAMutexData()
+		: mnLockCount(0)
+	{
+		// Empty
+	}
+
+
+	EA::Thread::MutexParameters::MutexParameters(bool /*bIntraProcess*/, const char* /*pName*/)
+	  : mbIntraProcess(true)
+	{
+	}
+
+
+	EA::Thread::Mutex::Mutex(const MutexParameters* pMutexParameters, bool bDefaultParameters)
+	{
+		if(!pMutexParameters && bDefaultParameters)
+		{
+			MutexParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pMutexParameters);
+	}
+
+
+	EA::Thread::Mutex::~Mutex()
+	{
+		EAT_ASSERT(mMutexData.mnLockCount == 0);
+	}
+
+
+	bool EA::Thread::Mutex::Init(const MutexParameters* /*pMutexParameters*/)
+	{
+		// Possibly copy pMutexParameters->mName to mMutexData.mName
+		return true;
+	}
+
+
+	int EA::Thread::Mutex::Lock(const ThreadTime& /*timeoutAbsolute*/)
+	{
+		EAT_ASSERT(mMutexData.mnLockCount < 100000);
+
+		return ++mMutexData.mnLockCount;
+	}
+
+
+	int EA::Thread::Mutex::Unlock()
+	{
+		EAT_ASSERT(mMutexData.mnLockCount > 0);
+
+		return --mMutexData.mnLockCount;
+	}
+
+
+	int EA::Thread::Mutex::GetLockCount() const
+	{
+		return mMutexData.mnLockCount;
+	}
+
+
+	bool EA::Thread::Mutex::HasLock() const
+	{
+		return (mMutexData.mnLockCount > 0);
+	}
+
+#endif // EA_THREAD_NONTHREADED_MUTEX

+ 711 - 0
source/eathread_pool.cpp

@@ -0,0 +1,711 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread_pool.h>
+#include <eathread/eathread_sync.h>
+#include <string.h>
+#include <new>
+
+#if   defined(_MSC_VER)
+	#pragma warning(push)
+	#pragma warning(disable: 6011) // Dereferencing NULL pointer 'gpAllocator'
+	#pragma warning(disable: 6211) // Leaking memory 'pThreadInfo' due to an exception.
+	#pragma warning(disable: 6326) // Potential comparison of a constant with another constant
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::ThreadPoolParameters::ThreadPoolParameters()
+  : mnMinCount(EA::Thread::ThreadPool::kDefaultMinCount),
+	mnMaxCount(EA::Thread::ThreadPool::kDefaultMaxCount),
+	mnInitialCount(EA::Thread::ThreadPool::kDefaultInitialCount),
+	mnIdleTimeoutMilliseconds(EA::Thread::ThreadPool::kDefaultIdleTimeout), // This is a relative time, not an absolute time. Can be a millisecond value or Thread::kTimeoutNone or Thread::kTimeoutImmediate.
+	mnProcessorMask(0xffffffff),
+	mDefaultThreadParameters()
+{
+	// Empty
+}
+
+
+EA::Thread::ThreadPool::Job::Job()
+  : mpRunnable(NULL), mpFunction(NULL), mpContext(NULL)
+{
+	// Empty
+}
+
+
+EA::Thread::ThreadPool::ThreadInfo::ThreadInfo()
+  : mbActive(false),
+	mbQuit(false),
+  //mbPersistent(false),
+	mpThread(NULL),
+	mpThreadPool(NULL),
+	mCurrentJob()
+{
+	// Empty
+}
+
+
+EA::Thread::ThreadPool::ThreadPool(const ThreadPoolParameters* pThreadPoolParameters, bool bDefaultParameters)
+  : mbInitialized(false),
+	mnMinCount(kDefaultMinCount), 
+	mnMaxCount(kDefaultMaxCount), 
+	mnCurrentCount(0),
+	mnActiveCount(0),
+	mnIdleTimeoutMilliseconds(kDefaultIdleTimeout),
+	mnProcessorMask((unsigned)kDefaultProcessorMask),
+	mnProcessorCount(0),
+	mnNextProcessor(0),
+	mnPauseCount(0),
+	mnLastJobID(0),
+	mDefaultThreadParameters(),
+	mThreadCondition(NULL, false),  // Explicitly don't initialize.
+	mThreadMutex(NULL, false),      // Explicitly don't initialize.
+	mThreadInfoList(),
+	mJobList()
+{
+	if(!pThreadPoolParameters && bDefaultParameters)
+	{
+		ThreadPoolParameters parameters;
+		Init(&parameters);
+	}
+	else
+		Init(pThreadPoolParameters);
+}
+
+
+EA::Thread::ThreadPool::~ThreadPool()
+{
+	Shutdown(kJobWaitAll, kTimeoutNone);
+	EAT_ASSERT(mJobList.empty() && mThreadInfoList.empty() && (mnCurrentCount == 0) && (mnActiveCount == 0) && (mThreadMutex.GetLockCount() == 0));
+}
+
+
+#ifdef _MSC_VER
+	#pragma warning(push)
+	#pragma warning(disable: 4296 4706) // '>=' : expression is always true and assignment within conditional expression (in the assert)
+#endif
+
+
+#if EAT_ASSERT_ENABLED
+	template <class T>
+	inline bool EATIsUnsigned(T)
+	{ return (((T)(-1) >> 1) != (T)(-1)); }
+#endif
+
+
+// If mDefaultThreadParameters.mnProcessor is set to kThreadPoolParametersProcessorDefault,  
+// then the ThreadPool controls what processors the thread executes on. Otherwise ThreadPool 
+// doesn't set the thread affinity itself.
+static const int kThreadPoolParametersProcessorDefault = -1;
+
+
+bool EA::Thread::ThreadPool::Init(const ThreadPoolParameters* pThreadPoolParameters)
+{
+	if(!mbInitialized)
+	{
+		if(pThreadPoolParameters && (mnCurrentCount == 0))
+		{
+			mbInitialized = true;
+
+			mnMinCount                = pThreadPoolParameters->mnMinCount;
+			mnMaxCount                = pThreadPoolParameters->mnMaxCount;
+			mnCurrentCount            = (int)pThreadPoolParameters->mnInitialCount;
+			mnIdleTimeoutMilliseconds = pThreadPoolParameters->mnIdleTimeoutMilliseconds;
+			mnProcessorMask           = pThreadPoolParameters->mnProcessorMask;
+			mDefaultThreadParameters  = pThreadPoolParameters->mDefaultThreadParameters;
+			mnProcessorCount          = (uint32_t)EA::Thread::GetProcessorCount();  // We currently assume this value is constant at runtime.
+
+			// Do bounds checking. 
+			//if(mnMinCount < 0)  // This check is unnecessary because mnMinCount is of an 
+			//    mnMinCount = 0; // unsigned data type. We assert for this unsigned-ness below.
+			EAT_ASSERT(EATIsUnsigned(mnMinCount));
+
+			if(mnMaxCount > EA_THREAD_POOL_MAX_SIZE)
+				mnMaxCount = EA_THREAD_POOL_MAX_SIZE;
+
+			if(mnCurrentCount < (int)mnMinCount)
+				mnCurrentCount = (int)mnMinCount;
+
+			if(mnCurrentCount > (int)mnMaxCount)
+				mnCurrentCount = (int)mnMaxCount;
+
+			// Make sure the processor mask refers to existing processors.
+			const int processorMask  = (1 << mnProcessorCount) - 1;       // So for a processor count of 8 we have a mask of 11111111 (255)
+
+			if((mnProcessorMask & processorMask) == 0) 
+				mnProcessorMask = 0xffffffff;
+
+			mDefaultThreadParameters.mpStack = NULL;  // You can't specify a default stack location, as every thread needs a unique one.
+			if(mDefaultThreadParameters.mnProcessor != EA::Thread::kProcessorAny)               // If the user hasn't set threads to execute on any processor chosen by the OS...
+				mDefaultThreadParameters.mnProcessor = kThreadPoolParametersProcessorDefault;   //   then use our default processing, which is for us to currently round-robin the processor used.
+
+			ConditionParameters mnp;
+			mThreadCondition.Init(&mnp);
+
+			MutexParameters mtp;
+			mThreadMutex.Init(&mtp);
+
+			mThreadMutex.Lock();
+			const int nDesiredCount((int)mnCurrentCount);
+			mnCurrentCount = 0;
+			AdjustThreadCount((unsigned int)nDesiredCount);
+			mThreadMutex.Unlock();
+
+			return true;
+		}
+	}
+	return false;
+}
+
+
+#ifdef _MSC_VER
+	#pragma warning(pop)
+#endif
+
+
+bool EA::Thread::ThreadPool::Shutdown(JobWait jobWait, const ThreadTime& timeoutAbsolute)
+{
+	int nResult;
+
+	if(mbInitialized)
+	{
+		mbInitialized = false;
+
+		nResult = WaitForJobCompletion(-1, jobWait, timeoutAbsolute);
+
+		mThreadMutex.Lock();
+
+		// If jobWait is kJobWaitNone, then we nuke all existing jobs.
+		if(jobWait == kJobWaitNone)
+			mJobList.clear();
+
+		// Leave a message to tell the thread to quit.
+		for(ThreadInfoList::iterator it(mThreadInfoList.begin()), itEnd(mThreadInfoList.end()); it != itEnd; )
+		{
+			ThreadInfo* const pThreadInfo = *it;
+
+			pThreadInfo->mbQuit       = true;
+		  //pThreadInfo->mbPersistent = false;
+
+			// If somehow the thread isn't running (possibly because it never started), manually remove it.
+			if(pThreadInfo->mpThread->GetStatus() != EA::Thread::Thread::kStatusRunning)
+				it = mThreadInfoList.erase(it);
+			else
+				++it;
+		}
+
+		// Wake up any threads that may be blocked on a condition variable wait.
+		mThreadCondition.Signal(true);
+
+		// Make sure we unlock after we signal, lest there be a certain kind of race condition.
+		mThreadMutex.Unlock();
+
+		// Wait for any existing threads to quit.
+		// Todo: Replace this poor polling loop with Thread::Wait calls.
+		//         Doing so requires a little finessing with the thread 
+		//         objects in the list. Possibly make ThreadInfo ref-counted.
+		while(!mThreadInfoList.empty())
+		{
+			ThreadSleep(1);
+			EAReadBarrier();
+		}
+
+		mThreadMutex.Lock();
+		mnPauseCount = 0;
+		mThreadMutex.Unlock();
+	}
+	else
+		nResult = kResultOK;
+
+	return (nResult == kResultOK);
+}
+
+
+intptr_t EA::Thread::ThreadPool::ThreadFunction(void* pContext)
+{
+	ThreadInfo* const pThreadInfo = reinterpret_cast<ThreadInfo*>(pContext);
+	ThreadPool* const pThreadPool = pThreadInfo->mpThreadPool;
+	Condition*  const pCondition  = &pThreadPool->mThreadCondition;
+	Mutex*      const pMutex      = &pThreadPool->mThreadMutex;
+
+	pMutex->Lock();
+
+	while(!pThreadInfo->mbQuit)
+	{
+		if(!pThreadPool->mJobList.empty())
+		{
+			pThreadInfo->mCurrentJob = pThreadPool->mJobList.front();
+			pThreadPool->mJobList.pop_front();
+			pThreadInfo->mbActive = true;
+			++pThreadPool->mnActiveCount; // Atomic integer operation.
+			pMutex->Unlock();
+
+			// Do the job here. It's important that we keep the mutex unlocked while doing the job.
+			if(pThreadInfo->mCurrentJob.mpRunnable)
+				pThreadInfo->mCurrentJob.mpRunnable->Run(pThreadInfo->mCurrentJob.mpContext);
+			else if(pThreadInfo->mCurrentJob.mpFunction)
+				pThreadInfo->mCurrentJob.mpFunction(pThreadInfo->mCurrentJob.mpContext);
+			else
+				pThreadInfo->mbQuit = true;  // Tell ourself to quit.
+
+			// Problem: We are not paying attention to the pThreadInfo->mbPersistent variable. 
+			// We don't have an easy way of dealing with it because we don't have a means for
+			// the ThreadPool to direct quit commands to individual threads. For now we don't
+			// pay attention to mbPersistent and require that persistence be controlled by 
+			// the min/max thread count settings. 
+
+			pMutex->Lock();
+
+			--pThreadPool->mnActiveCount; // Atomic integer operation.
+			pThreadInfo->mbActive = false;
+		}
+		else
+		{
+			// The wait call here will unlock the condition variable and will re-lock it upon return.
+			EA::Thread::ThreadTime timeoutAbsolute = (GetThreadTime() + pThreadPool->mnIdleTimeoutMilliseconds);
+
+			if (pThreadPool->mnIdleTimeoutMilliseconds == kTimeoutNone) 
+				timeoutAbsolute = kTimeoutNone;
+			else if(pThreadPool->mnIdleTimeoutMilliseconds == kTimeoutImmediate)
+				timeoutAbsolute = kTimeoutImmediate;
+			else if(timeoutAbsolute == kTimeoutNone) // If it coincidentally is the magic kTimeoutNone value...
+				timeoutAbsolute -= 1;
+
+			const Condition::Result result = pCondition->Wait(pMutex, timeoutAbsolute);
+
+			if(result != Condition::kResultOK) // If result is an error then what do we do? Is there a 
+				pThreadInfo->mbQuit = true;    // specific reason to quit? There's no good solution here,
+		}                                      // but on the other hand this should never happen in practice.
+	}
+
+	pThreadPool->RemoveThread(pThreadInfo);
+
+	pMutex->Unlock();
+
+	return 0;
+}
+
+
+EA::Thread::ThreadPool::Result EA::Thread::ThreadPool::QueueJob(const Job& job, Thread** ppThread, bool /*bEnableDeferred*/)
+{
+	if(mbInitialized){
+		mThreadMutex.Lock();
+
+		// If there are other threads busy with jobs or other threads soon to be busy with jobs and if the thread count is less than the maximum allowable, bump up the thread count by one.
+		EAT_ASSERT(mnActiveCount <= mnCurrentCount);
+		if((((int)mnActiveCount >= mnCurrentCount) || !mJobList.empty()) && (mnCurrentCount < (int)mnMaxCount))
+			AdjustThreadCount((unsigned)(mnCurrentCount + 1));
+
+		mJobList.push_back(job);
+		FixThreads();
+
+		if(mnPauseCount == 0)
+			mThreadCondition.Signal(false); // Wake up one thread to work on this.
+
+		mThreadMutex.Unlock();
+
+		if(ppThread){
+			// In this case the caller wants to know what thread got the job. 
+			// So we wait until we know what caller got the job.
+			// Todo: Complete this.
+			*ppThread = NULL;
+		}
+
+		return kResultDeferred;
+	}
+
+	return kResultError;
+}
+
+
+int EA::Thread::ThreadPool::Begin(IRunnable* pRunnable, void* pContext, Thread** ppThread, bool bEnableDeferred)
+{
+	Job job;
+	job.mnJobID    = mnLastJobID.Increment();
+	job.mpRunnable = pRunnable;
+	job.mpFunction = NULL;
+	job.mpContext  = pContext;
+
+	if(QueueJob(job, ppThread, bEnableDeferred) != kResultError)
+		return job.mnJobID;
+	return kResultError;
+}
+
+
+int EA::Thread::ThreadPool::Begin(RunnableFunction pFunction, void* pContext, Thread** ppThread, bool bEnableDeferred)
+{
+	Job job;
+	job.mnJobID     = mnLastJobID.Increment();
+	job.mpRunnable = NULL;
+	job.mpFunction = pFunction;
+	job.mpContext  = pContext;
+
+	if(QueueJob(job, ppThread, bEnableDeferred) != kResultError)
+		return job.mnJobID;
+	return kResultError;
+}
+
+
+int EA::Thread::ThreadPool::WaitForJobCompletion(int nJob, JobWait jobWait, const ThreadTime& timeoutAbsolute)
+{
+	int nResult = kResultError;
+
+	if(nJob == -1){
+		// We have a problem here in that we need to wait for all threads to finish
+		// but the only way to wait for them to finish is to use the Thread::WaitForEnd
+		// function. But when the thread exits, it destroys the Thread object rendering
+		// it unsafe for us to use that object in any safe way here. We can rearrange
+		// things to allow this to work more cleanly, but in the meantime we spin and 
+		// sleep, which is not a good solution if the worker threads are of a lower
+		// priority than this sleeping thread, as this thread will steal their time.
+
+		if(jobWait == kJobWaitNone){
+			// Do nothing.
+			nResult = kResultOK;
+		}
+		else if(jobWait == kJobWaitCurrent){
+			// Wait for currently running jobs to complete.
+			while((mnActiveCount != 0) && (GetThreadTime() < timeoutAbsolute))
+				ThreadSleep(10);
+			if(mnActiveCount == 0)
+				nResult = kResultOK;
+			else
+				nResult = kResultTimeout;
+		}
+		else{ // jobWait == kJobWaitAll
+			// Wait for all current and queued jobs to complete.
+			bool shouldContinue = true;
+
+			while(shouldContinue)
+			{
+				mThreadMutex.Lock();
+				shouldContinue = (((mnActiveCount != 0) || !mJobList.empty()) && (GetThreadTime() < timeoutAbsolute));
+				mThreadMutex.Unlock();
+				if(shouldContinue)
+					ThreadSleep(10);
+			}
+
+			mThreadMutex.Lock();
+
+			if((mnActiveCount == 0) && mJobList.empty())
+				nResult = kResultOK;
+			else
+				nResult = kResultTimeout;
+
+			mThreadMutex.Unlock();
+		}
+	}
+	else{
+		// Like above we do the wait via polling. Ideally we want to set up a 
+		// mechanism whereby we sleep until an alarm wakes us. This can perhaps
+		// be done by setting a flag in the job which causes the job to signal
+		// the alarm when complete. In the meantime we will follow the simpler
+		// behaviour we have here.
+
+		bool bJobExists;
+
+		for(;;){
+			bJobExists = false;
+			mThreadMutex.Lock();
+			
+			// Search the list of jobs yet to become active to see if the job exists in there.
+			for(JobList::iterator it(mJobList.begin()); it != mJobList.end(); ++it){
+				const Job& job = *it;
+
+				if(job.mnJobID == nJob){ // If the user's job was found...
+					bJobExists = true;
+					nResult = kResultTimeout;
+				}
+			}
+
+			// Search the list of jobs actively executing as well.
+			for(ThreadInfoList::iterator it(mThreadInfoList.begin()); it != mThreadInfoList.end(); ++it){
+				const ThreadInfo* const pThreadInfo = *it;
+				const Job& job = pThreadInfo->mCurrentJob;
+
+				// Note the thread must be active for the Job assigned to it be valid.
+				if(pThreadInfo->mbActive && job.mnJobID == nJob){ // If the user's job was found...
+					bJobExists = true;
+					nResult = kResultTimeout;
+				}
+			}
+	  
+			mThreadMutex.Unlock();
+			if(!bJobExists || (GetThreadTime() >= timeoutAbsolute))
+				break;
+			ThreadSleep(10);
+		}
+
+		if(!bJobExists)
+			nResult = kResultOK;
+	}
+
+	return nResult;
+}
+
+
+void EA::Thread::ThreadPool::Pause(bool bPause)
+{
+	if(bPause)
+		++mnPauseCount;
+	else{
+		if(mnPauseCount.Decrement() == 0){
+			mThreadMutex.Lock();
+			if(!mJobList.empty())
+				mThreadCondition.Signal(true);
+			mThreadMutex.Unlock();
+		}
+	}
+}
+
+
+void EA::Thread::ThreadPool::Lock()
+{
+	mThreadMutex.Lock();
+}
+
+
+void EA::Thread::ThreadPool::Unlock()
+{
+	mThreadMutex.Unlock();
+}
+
+
+void EA::Thread::ThreadPool::SetupThreadParameters(EA::Thread::ThreadParameters& tp)
+{
+	if(tp.mnProcessor == kThreadPoolParametersProcessorDefault) // If we are to manipulate tp.mnProcessor...
+	{
+		if(mnProcessorMask != 0xffffffff) // If we are not using the default...
+		{
+			// We round-robin mnNextProcessor within our mnProcessorMask.
+			while(((1 << mnNextProcessor) & mnProcessorMask) == 0)
+				++mnNextProcessor;
+				
+			mnNextProcessor %= mnProcessorCount;
+			tp.mnProcessor = (int)mnNextProcessor++;
+		}
+	}
+}
+
+
+EA::Thread::ThreadPool::ThreadInfo* EA::Thread::ThreadPool::AddThread(const EA::Thread::ThreadParameters& tp, bool bBeginThread)
+{
+	ThreadInfo* const pThreadInfo = CreateThreadInfo();
+	EAT_ASSERT(pThreadInfo != NULL);
+
+	if(pThreadInfo)
+	{
+		AddThread(pThreadInfo);
+
+		if(bBeginThread)
+		{
+			ThreadParameters tpUsed(tp);
+			SetupThreadParameters(tpUsed);  // This function sets tpUsed.mnProcessor
+
+			pThreadInfo->mpThread->Begin(ThreadFunction, pThreadInfo, &tpUsed);
+		}
+	}
+
+	return pThreadInfo;
+}
+
+
+// Gets the ThreadInfo for the nth Thread identified by index. 
+// You must call this function within a Lock/Unlock pair on the thread pool.
+EA::Thread::ThreadPool::ThreadInfo* EA::Thread::ThreadPool::GetThreadInfo(int index)
+{
+	EA::Thread::AutoMutex autoMutex(mThreadMutex);
+
+	int i = 0;
+
+	for(ThreadInfoList::iterator it = mThreadInfoList.begin(); it != mThreadInfoList.end(); ++it)
+	{
+		if(i == index)
+		{
+			ThreadInfo* pThreadInfo = *it;
+			return pThreadInfo;
+		}
+
+		++i;
+	}
+		
+	return NULL;
+}
+
+
+// Unless you call this function while the Pool is locked (via Lock), the return
+// value may be out of date by the time you read it. 
+int EA::Thread::ThreadPool::GetThreadCount()
+{
+	EA::Thread::AutoMutex autoMutex(mThreadMutex);
+
+	return (int)mThreadInfoList.size();
+}
+
+
+EA::Thread::ThreadPool::ThreadInfo* EA::Thread::ThreadPool::CreateThreadInfo()
+{
+	// Currently we assume that allocation never fails.
+	ThreadInfo* const pThreadInfo = gpAllocator ? new(gpAllocator->Alloc(sizeof(ThreadInfo))) ThreadInfo : new ThreadInfo;
+
+	if(pThreadInfo)
+	{
+		pThreadInfo->mbActive     = false;
+		pThreadInfo->mbQuit       = false;
+		pThreadInfo->mpThreadPool = this;
+		pThreadInfo->mpThread     = gpAllocator ? new(gpAllocator->Alloc(sizeof(Thread))) Thread : new Thread;
+	}
+
+	return pThreadInfo;
+}
+
+
+void EA::Thread::ThreadPool::AdjustThreadCount(unsigned nDesiredCount)
+{
+	// This function doesn't read mnMinCount/mnMaxCount, as it expects the caller to do so.
+	// Assumes that condition variable is locked.
+	int nAdjustment = (int)(nDesiredCount - mnCurrentCount);
+
+	while(nAdjustment > 0) // If we are to create threads...
+	{
+		ThreadInfo* const pThreadInfo = CreateThreadInfo();
+		EAT_ASSERT(pThreadInfo != NULL);
+
+		AddThread(pThreadInfo);
+
+		ThreadParameters tpUsed(mDefaultThreadParameters);
+		SetupThreadParameters(tpUsed); // This function sets tpUsed.mnProcessor
+
+		pThreadInfo->mpThread->Begin(ThreadFunction, pThreadInfo, &tpUsed);
+		nAdjustment--;
+	}
+
+	while(nAdjustment < 0) // If we are to quit threads...
+	{
+		// An empty job is a signal for a thread to quit.
+		QueueJob(Job(), NULL, true);
+		nAdjustment++;
+	}
+
+	FixThreads(); // Makes sure that mnCurrentCount really does match the number of threads waiting for work.
+}
+
+
+
+void EA::Thread::ThreadPool::AddThread(ThreadInfo* pThreadInfo)
+{
+	// Assumes that condition variable is locked.
+	mThreadInfoList.push_back(pThreadInfo);
+	++mnCurrentCount;
+}
+
+
+void EA::Thread::ThreadPool::RemoveThread(ThreadInfo* pThreadInfo)
+{
+	// Assumes that condition variable is locked.
+	ThreadInfoList::iterator it = mThreadInfoList.find(pThreadInfo);
+	EAT_ASSERT(it != mThreadInfoList.end());
+
+	if(it != mThreadInfoList.end())
+	{
+		if(gpAllocator)
+		{
+			pThreadInfo->mpThread->~Thread();
+			gpAllocator->Free(pThreadInfo->mpThread);
+		}
+		else
+			delete pThreadInfo->mpThread;
+
+		pThreadInfo->mpThread = NULL;
+		mThreadInfoList.erase(it);
+
+		if(gpAllocator)
+		{
+			pThreadInfo->~ThreadInfo();
+			gpAllocator->Free(pThreadInfo);
+		}
+		else
+			delete pThreadInfo;
+
+		--mnCurrentCount;
+	}
+}
+
+
+// FixThreads
+// We have a small in problem in that the system allows threads to explicitly exit at any time without
+// returning to the caller. Many operating systems with thread support don't have a mechanism to enable
+// you to tell you via a callback when a thread has exited. Due to this latter problem, it is possible
+// that threads could exit without us ever finding out about it. So we poll the threads to catch up 
+// to their state in such cases here.
+void EA::Thread::ThreadPool::FixThreads()
+{
+	// Assumes that condition variable is locked.
+	for(ThreadInfoList::iterator it(mThreadInfoList.begin()), itEnd(mThreadInfoList.end()); it != itEnd; ++it)
+	{
+		ThreadInfo* const pThreadInfo = *it;
+
+		// Fix any threads which have exited via a thread exit and not by simply returning to the caller.
+		const EA::Thread::Thread::Status status = pThreadInfo->mpThread->GetStatus();
+
+		if(status == EA::Thread::Thread::kStatusEnded)
+			pThreadInfo->mpThread->Begin(ThreadFunction, pThreadInfo, &mDefaultThreadParameters);
+	}
+}
+
+
+EA::Thread::ThreadPool* EA::Thread::ThreadPoolFactory::CreateThreadPool()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::ThreadPool))) EA::Thread::ThreadPool;
+	else
+		return new EA::Thread::ThreadPool;
+}
+
+
+void EA::Thread::ThreadPoolFactory::DestroyThreadPool(EA::Thread::ThreadPool* pThreadPool)
+{
+	if(gpAllocator)
+	{
+		pThreadPool->~ThreadPool();
+		gpAllocator->Free(pThreadPool);
+	}
+	else
+		delete pThreadPool;
+}
+
+
+size_t EA::Thread::ThreadPoolFactory::GetThreadPoolSize()
+{
+	return sizeof(EA::Thread::ThreadPool);
+}
+
+
+EA::Thread::ThreadPool* EA::Thread::ThreadPoolFactory::ConstructThreadPool(void* pMemory)
+{
+	return new(pMemory) EA::Thread::ThreadPool;
+}
+
+
+void EA::Thread::ThreadPoolFactory::DestructThreadPool(EA::Thread::ThreadPool* pThreadPool)
+{
+	pThreadPool->~ThreadPool();
+}
+
+
+#if defined(_MSC_VER)
+	#pragma warning(pop)
+#endif
+
+

+ 263 - 0
source/eathread_rwmutex.cpp

@@ -0,0 +1,263 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#if defined(_MSC_VER)
+	#pragma warning(disable: 4985)  // 'ceil': attributes not present on previous declaration.1>    C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\INCLUDE\intrin.h(142) : see declaration of 'ceil'
+#endif
+
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread_rwmutex.h>
+#include <eathread/eathread.h>
+#include <new> // include new for placement new operator
+#include <string.h>
+
+#ifdef _MSC_VER
+	#pragma warning(disable : 4996) // This function or variable may be unsafe / deprecated.
+#endif
+
+
+	EARWMutexData::EARWMutexData()
+	  : mnReadWaiters(0), 
+		mnWriteWaiters(0), 
+		mnReaders(0),
+		mThreadIdWriter(EA::Thread::kThreadIdInvalid), 
+		mMutex(NULL, false),
+		mReadCondition(NULL, false),
+		mWriteCondition(NULL, false)
+	{
+		// Empty
+	}
+	
+	
+	EA::Thread::RWMutexParameters::RWMutexParameters(bool bIntraProcess, const char* pName)
+		: mbIntraProcess(bIntraProcess)
+	{
+		(void)pName; // Suppress possible warnings.
+
+		#ifdef EA_PLATFORM_WINDOWS
+			if(pName)
+			{
+				strncpy(mName, pName, sizeof(mName)-1);
+				mName[sizeof(mName)-1] = 0;
+			}
+			else
+				mName[0] = 0;
+		#endif
+	}
+	
+	
+	EA::Thread::RWMutex::RWMutex(const RWMutexParameters* pRWMutexParameters, bool bDefaultParameters)
+	{
+		if(!pRWMutexParameters && bDefaultParameters)
+		{
+			RWMutexParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pRWMutexParameters);
+	}
+	
+	
+	EA::Thread::RWMutex::~RWMutex()
+	{
+		// Possibly do asserts here.
+	}
+	
+	
+	bool EA::Thread::RWMutex::Init(const RWMutexParameters* pRWMutexParameters)
+	{
+		if(pRWMutexParameters)
+		{
+			#if EATHREAD_MULTIPROCESSING_OS
+				EAT_ASSERT(pRWMutexParameters->mbIntraProcess); // We don't currently have support for intra-process RWMutex on these platforms (and any multi-process platform). 
+			#endif
+
+			MutexParameters mup(pRWMutexParameters->mbIntraProcess);
+			mRWMutexData.mMutex.Init(&mup);
+
+			ConditionParameters mop(pRWMutexParameters->mbIntraProcess);
+			mRWMutexData.mReadCondition.Init(&mop);
+			mRWMutexData.mWriteCondition.Init(&mop);
+			return true;
+		}
+
+		return false;
+	}
+	
+	
+	int EA::Thread::RWMutex::Lock(LockType lockType, const ThreadTime& timeoutAbsolute)
+	{
+		int result = 0;
+	
+		mRWMutexData.mMutex.Lock(); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+		EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+	
+		// We cannot obtain a write lock recursively, else we will deadlock.
+		// Alternatively, we can build a bunch of extra logic to deal with this.
+		EAT_ASSERT(mRWMutexData.mThreadIdWriter != GetThreadId());
+	
+		// Assert that there aren't both readers and writers at the same time.
+		EAT_ASSERT(!((mRWMutexData.mThreadIdWriter != kThreadIdInvalid) && mRWMutexData.mnReaders));
+	
+		if(lockType == kLockTypeRead)
+		{
+			while(mRWMutexData.mThreadIdWriter != kThreadIdInvalid)
+			{
+				EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+	
+				mRWMutexData.mnReadWaiters++;
+				const Condition::Result mresult = mRWMutexData.mReadCondition.Wait(&mRWMutexData.mMutex, timeoutAbsolute);
+				mRWMutexData.mnReadWaiters--;
+	
+				EAT_ASSERT(mresult != EA::Thread::Condition::kResultError);
+				EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+	
+				if(mresult == Condition::kResultTimeout)
+				{
+					mRWMutexData.mMutex.Unlock();
+					return kResultTimeout;
+				}
+			}
+	
+			result = ++mRWMutexData.mnReaders; // This is not an atomic operation. We are within a mutex lock.
+		}
+		else if(lockType == kLockTypeWrite)
+		{
+			while((mRWMutexData.mnReaders > 0) || (mRWMutexData.mThreadIdWriter != kThreadIdInvalid))
+			{
+				EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+	
+				mRWMutexData.mnWriteWaiters++;
+				const Condition::Result mresult = mRWMutexData.mWriteCondition.Wait(&mRWMutexData.mMutex, timeoutAbsolute);
+				mRWMutexData.mnWriteWaiters--;
+	
+				EAT_ASSERT(mresult != EA::Thread::Condition::kResultError);
+				EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+	
+				if(mresult == Condition::kResultTimeout)
+				{
+					mRWMutexData.mMutex.Unlock();
+					return kResultTimeout;
+				}
+			}
+	
+			result = 1;
+			mRWMutexData.mThreadIdWriter = GetThreadId();
+		}
+	
+		EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+		mRWMutexData.mMutex.Unlock();
+	
+		return result;
+	}
+	
+	
+	int EA::Thread::RWMutex::Unlock()
+	{
+		mRWMutexData.mMutex.Lock(); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+		EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+	
+		if(mRWMutexData.mThreadIdWriter != kThreadIdInvalid)
+		{
+			EAT_ASSERT(mRWMutexData.mThreadIdWriter == GetThreadId());
+	
+			//Possibly enable this if we want some runtime error checking at some cost.
+			//if(mRWMutexData.mThreadIdWriter == GetThreadId()){
+			//    mRWMutexData.mMutex.Unlock();
+			//    return kResultError;
+			//}
+	
+			mRWMutexData.mThreadIdWriter = kThreadIdInvalid;
+		}
+		else
+		{
+			EAT_ASSERT(mRWMutexData.mnReaders >= 1);
+	
+			//Possibly enable this if we want some runtime error checking at some cost.
+			//if(mRWMutexData.mnReaders < 1){
+			//    mRWMutexData.mMutex.Unlock();
+			//    return kResultError;
+			//}
+	
+			const int nNewReaders = --mRWMutexData.mnReaders; // This is not an atomic operation. We are within a mutex lock.
+			if(nNewReaders > 0)
+			{
+				EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+				mRWMutexData.mMutex.Unlock();
+				return nNewReaders;
+			}
+		}
+	
+		if(mRWMutexData.mnWriteWaiters > 0)
+			mRWMutexData.mWriteCondition.Signal(false);
+		else if(mRWMutexData.mnReadWaiters > 0)
+			mRWMutexData.mReadCondition.Signal(true);
+	
+		EAT_ASSERT(mRWMutexData.mMutex.GetLockCount() == 1);
+		mRWMutexData.mMutex.Unlock();
+	
+		return 0;
+	}
+	
+	
+	int EA::Thread::RWMutex::GetLockCount(LockType lockType)
+	{
+		if(lockType == kLockTypeRead)
+			return mRWMutexData.mnReaders;
+		else if((lockType == kLockTypeWrite) && (mRWMutexData.mThreadIdWriter != kThreadIdInvalid))
+			return 1;
+		return 0;
+	}
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::RWMutex* EA::Thread::RWMutexFactory::CreateRWMutex()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::RWMutex))) EA::Thread::RWMutex;
+	else
+		return new EA::Thread::RWMutex;
+}
+
+void EA::Thread::RWMutexFactory::DestroyRWMutex(EA::Thread::RWMutex* pRWMutex)
+{
+	if(gpAllocator)
+	{
+		pRWMutex->~RWMutex();
+		gpAllocator->Free(pRWMutex);
+	}
+	else
+		delete pRWMutex;
+}
+
+size_t EA::Thread::RWMutexFactory::GetRWMutexSize()
+{
+	return sizeof(EA::Thread::RWMutex);
+}
+
+EA::Thread::RWMutex* EA::Thread::RWMutexFactory::ConstructRWMutex(void* pMemory)
+{
+	return new(pMemory) EA::Thread::RWMutex;
+}
+
+void EA::Thread::RWMutexFactory::DestructRWMutex(EA::Thread::RWMutex* pRWMutex)
+{
+	pRWMutex->~RWMutex();
+}
+
+
+
+
+

+ 361 - 0
source/eathread_rwmutex_ip.cpp

@@ -0,0 +1,361 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread_rwmutex_ip.h>
+#include <new> // include new for placement new operator
+#include <string.h>
+
+
+#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+
+	///////////////////////////////////////////////////////////////////////////
+	// EARWMutexIPData
+	///////////////////////////////////////////////////////////////////////////
+
+	EA::Thread::EARWMutexIPData::EARWMutexIPData()
+	  : mSharedData(),  // This still needs to be Init-ed.
+		mMutex(NULL),
+		mReadSemaphore(NULL),
+		mWriteSemaphore(NULL)
+	{
+	}
+
+	EA::Thread::EARWMutexIPData::~EARWMutexIPData()
+	{
+		// mSharedData.Shutdown(); // This shouldn't be necessary, as the SharedData dtor will do this itself.
+	}
+
+	bool EA::Thread::EARWMutexIPData::Init(const char* pName)
+	{
+		char mutexName[256];
+		mutexName[0] = '\0';
+		if(pName)
+			strcpy(mutexName, pName);
+		strcat(mutexName, ".Mutex");
+		mMutex = CreateMutexA(NULL, FALSE, mutexName);
+
+		char readSemaphoreName[256];
+		readSemaphoreName[0] = '\0';
+		if(pName)
+			strcpy(readSemaphoreName, pName);
+		strcat(readSemaphoreName, ".SemR");
+		mReadSemaphore = CreateSemaphoreA(NULL, 0, 9999, readSemaphoreName);
+
+		char writeSemaphoreName[256];
+		writeSemaphoreName[0] = '\0';
+		if(pName)
+			strcpy(writeSemaphoreName, pName);
+		strcat(writeSemaphoreName, ".SemW");
+		mWriteSemaphore = CreateSemaphoreA(NULL, 0, 9999, writeSemaphoreName);
+
+		return mSharedData.Init(pName);
+	}
+
+	void EA::Thread::EARWMutexIPData::Shutdown()
+	{
+		if(mMutex)
+		{
+			CloseHandle(mMutex);
+			mMutex = NULL;
+		}
+
+		if(mReadSemaphore)
+		{
+			CloseHandle(mReadSemaphore);
+			mReadSemaphore = NULL;
+		}
+
+		if(mWriteSemaphore)
+		{
+			CloseHandle(mWriteSemaphore);
+			mWriteSemaphore = NULL;
+		}
+
+		mSharedData.Shutdown();
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// RWMutexIPParameters
+	///////////////////////////////////////////////////////////////////////////
+
+	EA::Thread::RWMutexIPParameters::RWMutexIPParameters(bool bIntraProcess, const char* pName)
+		: mbIntraProcess(bIntraProcess)
+	{
+		#ifdef EA_PLATFORM_WINDOWS
+			if(pName)
+			{
+				strncpy(mName, pName, sizeof(mName)-1);
+				mName[sizeof(mName)-1] = 0;
+			}
+			else
+				mName[0] = 0;
+		#else
+			(void)pName; // Suppress possible warnings.
+		#endif
+	}
+
+
+
+	///////////////////////////////////////////////////////////////////////////
+	// RWMutexIP
+	///////////////////////////////////////////////////////////////////////////
+
+	EA::Thread::RWMutexIP::RWMutexIP(const RWMutexIPParameters* pRWMutexIPParameters, bool bDefaultParameters)
+	{
+		if(!pRWMutexIPParameters && bDefaultParameters)
+		{
+			RWMutexIPParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pRWMutexIPParameters);
+	}
+	
+	
+	EA::Thread::RWMutexIP::~RWMutexIP()
+	{
+	}
+	
+
+	bool EA::Thread::RWMutexIP::Init(const RWMutexIPParameters* pRWMutexIPParameters)
+	{
+		if(pRWMutexIPParameters)
+		{
+			// Must provide a valid name for inter-process RWMutex.
+			EAT_ASSERT(pRWMutexIPParameters->mbIntraProcess || pRWMutexIPParameters->mName[0]);
+
+			return mRWMutexIPData.Init(pRWMutexIPParameters->mName);
+		}
+
+		return false;
+	}
+
+
+	int EA::Thread::RWMutexIP::Lock(LockType lockType, const ThreadTime& /*timeoutAbsolute*/)
+	{
+		int result = 0;
+
+		WaitForSingleObject(mRWMutexIPData.mMutex, INFINITE); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+		//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+	
+		// We cannot obtain a write lock recursively, else we will deadlock.
+		// Alternatively, we can build a bunch of extra logic to deal with this.
+		EAT_ASSERT(mRWMutexIPData.mSharedData->mThreadIdWriter != ::GetCurrentThreadId());
+	
+		// Assert that there aren't both readers and writers at the same time.
+		EAT_ASSERT(!((mRWMutexIPData.mSharedData->mThreadIdWriter != kSysThreadIdInvalid) && mRWMutexIPData.mSharedData->mnReaders));
+
+		if(lockType == kLockTypeRead)
+		{
+			while(mRWMutexIPData.mSharedData->mThreadIdWriter != kSysThreadIdInvalid)
+			{
+				//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+	
+				mRWMutexIPData.mSharedData->mnReadWaiters++;
+				ReleaseMutex(mRWMutexIPData.mMutex);
+				DWORD dwResult = WaitForSingleObject(mRWMutexIPData.mReadSemaphore, INFINITE); // To do: support timeoutAbsolute
+				WaitForSingleObject(mRWMutexIPData.mMutex, INFINITE);
+				mRWMutexIPData.mSharedData->mnReadWaiters--;
+	
+				EAT_ASSERT(dwResult != WAIT_FAILED);
+				//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+	
+				if(dwResult == WAIT_TIMEOUT)
+				{
+					ReleaseMutex(mRWMutexIPData.mMutex);
+					return kResultTimeout;
+				}
+			}
+	
+			result = ++mRWMutexIPData.mSharedData->mnReaders; // This is not an atomic operation. We are within a mutex lock.
+		}
+		else if(lockType == kLockTypeWrite)
+		{
+			while((mRWMutexIPData.mSharedData->mnReaders > 0) ||                            // While somebody has the read lock or
+				  (mRWMutexIPData.mSharedData->mThreadIdWriter != kSysThreadIdInvalid))     // somebody has the write lock... go back to waiting.
+			{
+				//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+	
+				mRWMutexIPData.mSharedData->mnWriteWaiters++;
+				ReleaseMutex(mRWMutexIPData.mMutex);
+				DWORD dwResult = WaitForSingleObject(mRWMutexIPData.mWriteSemaphore, INFINITE); // To do: support timeoutAbsolute
+				WaitForSingleObject(mRWMutexIPData.mMutex, INFINITE);
+				mRWMutexIPData.mSharedData->mnWriteWaiters--;
+	
+				EAT_ASSERT(dwResult != WAIT_FAILED);
+				//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+	
+				if(dwResult == WAIT_TIMEOUT)
+				{
+					ReleaseMutex(mRWMutexIPData.mMutex);
+					return kResultTimeout;
+				}
+			}
+	
+			result = 1;
+			mRWMutexIPData.mSharedData->mThreadIdWriter = ::GetCurrentThreadId();
+		}
+	
+		//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+		ReleaseMutex(mRWMutexIPData.mMutex);
+	
+		return result;
+	}
+
+
+	int EA::Thread::RWMutexIP::Unlock()
+	{
+		WaitForSingleObject(mRWMutexIPData.mMutex, INFINITE); // This lock should always be fast, as it belongs to us and we only hold onto it very temporarily.
+		//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+
+		if(mRWMutexIPData.mSharedData->mThreadIdWriter != kSysThreadIdInvalid) // If we have a write lock...
+		{
+			EAT_ASSERT(mRWMutexIPData.mSharedData->mThreadIdWriter == ::GetCurrentThreadId());
+			mRWMutexIPData.mSharedData->mThreadIdWriter = kSysThreadIdInvalid;
+		}
+		else // Else we have a read lock...
+		{
+			EAT_ASSERT(mRWMutexIPData.mSharedData->mnReaders >= 1);
+
+			const int nNewReaders = --mRWMutexIPData.mSharedData->mnReaders; // This is not an atomic operation. We are within a mutex lock.
+			if(nNewReaders > 0)
+			{
+				//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+				ReleaseMutex(mRWMutexIPData.mMutex);
+				return nNewReaders;
+			}
+		}
+
+		if(mRWMutexIPData.mSharedData->mnWriteWaiters > 0) // We ignore the possibility that 
+		{
+			ReleaseSemaphore(mRWMutexIPData.mWriteSemaphore, 1, NULL);
+			// We rely on the released write waiter to decrement mnWriteWaiters.
+			// If the released write waiter doesn't wake up for a while, it's possible that the ReleaseMutex below 
+			// will be called and another read unlocker will execute this code and release the semaphore again and
+			// we will have two writers that are released. But this isn't a problem because the released writers 
+			// must still lock our mMutex and contend for the write lock, and one of the two will fail and go back
+			// to waiting on the semaphore.
+		}
+		else if(mRWMutexIPData.mSharedData->mnReadWaiters > 0)
+		{
+			// I'm a little concerned about this signal here. We release mnReadWaiters, though it's possible
+			// that a reader could timeout before this function completes and not all the semaphore count
+			// will be claimed by waiters. However, the read wait code in the Lock function above does 
+			// seem to be able to handle this case, as it does do a check to make sure it can hold the read
+			// lock before it claims it.
+			ReleaseSemaphore(mRWMutexIPData.mReadSemaphore, mRWMutexIPData.mSharedData->mnReadWaiters, NULL);
+		}
+
+		//EAT_ASSERT(mRWMutexIPData.mMutex.GetLockCount() == 1);
+		ReleaseMutex(mRWMutexIPData.mMutex);
+
+		return 0;
+	}
+
+
+	int EA::Thread::RWMutexIP::GetLockCount(LockType lockType)
+	{
+		if(lockType == kLockTypeRead)
+			return mRWMutexIPData.mSharedData->mnReaders;
+		else if((lockType == kLockTypeWrite) && (mRWMutexIPData.mSharedData->mThreadIdWriter != kSysThreadIdInvalid))
+			return 1;
+		return 0;
+	}
+
+
+#else
+
+	EA::Thread::RWMutexIPParameters::RWMutexIPParameters(bool /*bIntraProcess*/, const char* /*pName*/)
+	{
+	}
+
+
+	EA::Thread::RWMutexIP::RWMutexIP(const RWMutexIPParameters* /*pRWMutexIPParameters*/, bool /*bDefaultParameters*/)
+	{
+	}
+	
+	
+	EA::Thread::RWMutexIP::~RWMutexIP()
+	{
+	}
+	
+
+	bool EA::Thread::RWMutexIP::Init(const RWMutexIPParameters* /*pRWMutexIPParameters*/)
+	{
+		return false;
+	}
+
+
+	int EA::Thread::RWMutexIP::Lock(LockType /*lockType*/, const ThreadTime& /*timeoutAbsolute*/)
+	{
+		return 0;
+	}
+
+
+	int EA::Thread::RWMutexIP::Unlock()
+	{
+		return 0;
+	}
+
+
+	int EA::Thread::RWMutexIP::GetLockCount(LockType /*lockType*/)
+	{
+		return 0;
+	}
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::RWMutexIP* EA::Thread::RWMutexIPFactory::CreateRWMutexIP()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::RWMutexIP))) EA::Thread::RWMutexIP;
+	else
+		return new EA::Thread::RWMutexIP;
+}
+
+void EA::Thread::RWMutexIPFactory::DestroyRWMutexIP(EA::Thread::RWMutexIP* pRWMutexIP)
+{
+	if(gpAllocator)
+	{
+		pRWMutexIP->~RWMutexIP();
+		gpAllocator->Free(pRWMutexIP);
+	}
+	else
+		delete pRWMutexIP;
+}
+
+size_t EA::Thread::RWMutexIPFactory::GetRWMutexIPSize()
+{
+	return sizeof(EA::Thread::RWMutexIP);
+}
+
+EA::Thread::RWMutexIP* EA::Thread::RWMutexIPFactory::ConstructRWMutexIP(void* pMemory)
+{
+	return new(pMemory) EA::Thread::RWMutexIP;
+}
+
+void EA::Thread::RWMutexIPFactory::DestructRWMutexIP(EA::Thread::RWMutexIP* pRWMutexIP)
+{
+	pRWMutexIP->~RWMutexIP();
+}
+
+
+
+
+

+ 351 - 0
source/eathread_semaphore.cpp

@@ -0,0 +1,351 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include <eathread/eathread_semaphore.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <string.h>
+#include <new>
+EA_RESTORE_VC_WARNING()
+
+#if !EA_THREADS_AVAILABLE
+	#include <eathread/eathread_semaphore.h>
+#elif EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+	// Fall through.
+#elif 0 //EA_USE_CPP11_CONCURRENCY
+	#include "cpp11/eathread_semaphore_cpp11.cpp"
+#elif defined(__APPLE__)
+	#include "apple/eathread_semaphore_apple.cpp"
+#elif defined(EA_PLATFORM_ANDROID)
+	#include "android/eathread_semaphore_android.cpp"
+#elif defined(EA_PLATFORM_SONY)
+	#include "kettle/eathread_semaphore_kettle.cpp"
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include "unix/eathread_semaphore_unix.cpp"
+#elif defined(EA_PLATFORM_MICROSOFT)
+	#include "pc/eathread_semaphore_pc.cpp"
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::Semaphore* EA::Thread::SemaphoreFactory::CreateSemaphore()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::Semaphore))) EA::Thread::Semaphore;
+	else
+		return new EA::Thread::Semaphore;
+}
+
+void EA::Thread::SemaphoreFactory::DestroySemaphore(EA::Thread::Semaphore* pSemaphore)
+{
+	if(gpAllocator)
+	{
+		pSemaphore->~Semaphore();
+		gpAllocator->Free(pSemaphore);
+	}
+	else
+		delete pSemaphore;
+}
+
+size_t EA::Thread::SemaphoreFactory::GetSemaphoreSize()
+{
+	return sizeof(EA::Thread::Semaphore);
+}
+
+EA::Thread::Semaphore* EA::Thread::SemaphoreFactory::ConstructSemaphore(void* pMemory)
+{
+	return new(pMemory) EA::Thread::Semaphore;
+}
+
+void EA::Thread::SemaphoreFactory::DestructSemaphore(EA::Thread::Semaphore* pSemaphore)
+{
+	pSemaphore->~Semaphore();
+}
+
+
+
+#if EATHREAD_USE_SYNTHESIZED_SEMAPHORE
+
+	EASemaphoreData::EASemaphoreData()
+	  : mCV(), 
+		mMutex(),      
+		mnCount(0),
+		mnMaxCount(INT_MAX),
+		mbValid(false)
+	{
+		// Empty
+	}
+
+
+	EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* pName)
+	  : mInitialCount(initialCount),
+		mMaxCount(INT_MAX),
+		mbIntraProcess(bIntraProcess)
+	{
+		if(pName)
+		{
+			strncpy(mName, pName, sizeof(mName)-1);
+			mName[sizeof(mName)-1] = 0;
+		}
+		else
+			mName[0] = 0;
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
+	{
+		if(!pSemaphoreParameters && bDefaultParameters)
+		{
+			SemaphoreParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pSemaphoreParameters);
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(int initialCount)
+	{
+		SemaphoreParameters parameters(initialCount);
+		Init(&parameters);
+	}
+
+
+	EA::Thread::Semaphore::~Semaphore()
+	{
+		EAT_ASSERT(!mSemaphoreData.mMutex.HasLock()); // The mMutex destructor will also assert this, but here it makes it more obvious this mutex is ours.
+	}
+
+
+	bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
+	{
+		if(pSemaphoreParameters && (!mSemaphoreData.mbValid))
+		{
+			mSemaphoreData.mbValid    = true; // It's not really true unless our member mCV and mMutex init OK. To do: Added functions to our classes that verify they are OK.
+			mSemaphoreData.mnCount    = pSemaphoreParameters->mInitialCount;
+			mSemaphoreData.mnMaxCount = pSemaphoreParameters->mMaxCount;
+
+			if(mSemaphoreData.mnCount < 0)
+				mSemaphoreData.mnCount = 0;
+
+			return mSemaphoreData.mbValid;
+		}
+
+		return false;
+	}
+
+
+	int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
+	{
+		int nReturnValue = kResultError;
+		int result       = mSemaphoreData.mMutex.Lock(); // This mutex is owned by us and will be unlocked immediately in the mCV.Wait call, so we don't apply timeoutAbsolute. To consider: Maybe we should do so, though it's less efficient.
+
+		if(result > 0) // If success...
+		{
+			if(timeoutAbsolute == kTimeoutImmediate)
+			{
+				if(mSemaphoreData.mnCount.GetValue() >= 1)
+					nReturnValue = mSemaphoreData.mnCount.Decrement();
+				else
+					nReturnValue = kResultTimeout;
+			}
+			else
+			{
+				if(mSemaphoreData.mnCount.GetValue() >= 1) // If we can decrement it immediately...
+					nReturnValue = mSemaphoreData.mnCount.Decrement();
+				else // Else we need to wait.
+				{
+					Condition::Result cResult;
+
+					do{
+						cResult = mSemaphoreData.mCV.Wait(&mSemaphoreData.mMutex, timeoutAbsolute);
+					} while((cResult == Condition::kResultOK) && (mSemaphoreData.mnCount.GetValue() < 1)); // Always need to check the condition and retry if not matched. In rare cases two threads could return from Wait.
+
+					if(cResult == Condition::kResultOK) // If apparent success...
+						nReturnValue = mSemaphoreData.mnCount.Decrement();
+					else if(cResult == Condition::kResultTimeout)
+						nReturnValue = kResultTimeout;
+					else
+					{
+						// We return immediately here because mCV.Wait has not locked the mutex for 
+						// us and so we don't want to fall through and unlock it below. Also, it would
+						// be inefficient for us to lock here and fall through only to unlock it below.
+						return nReturnValue;
+					}
+				}
+			}
+
+			result = mSemaphoreData.mMutex.Unlock();
+			EAT_ASSERT(result >= 0);
+			if(result < 0)
+				nReturnValue = kResultError; // This Semaphore is now considered dead and unusable.
+		}
+
+		return nReturnValue;
+	}
+
+
+	int EA::Thread::Semaphore::Post(int count)
+	{
+		EAT_ASSERT(mSemaphoreData.mnCount >= 0);
+
+		int newValue = kResultError;
+		int result   = mSemaphoreData.mMutex.Lock();
+
+		if(result > 0)
+		{
+			// Set the new value to be whatever the current value is. 
+			newValue = mSemaphoreData.mnCount.GetValue();
+
+			if((mSemaphoreData.mnMaxCount - count) < newValue)  // If count would cause an overflow...
+				return kResultError; // We do what most OS implementations of max-count do. count = (mSemaphoreData.mnMaxCount - newValue);
+
+			newValue = mSemaphoreData.mnCount.Add(count);
+
+			bool bResult = mSemaphoreData.mCV.Signal(true); // Signal broadcast (the true arg) because semaphores could have multiple counts and multiple threads waiting for them. There's a potential "thundering herd" problem here.
+			EAT_ASSERT(bResult);
+			EA_UNUSED(bResult);
+
+			result = mSemaphoreData.mMutex.Unlock(); // Important that we lock after the mCV.Signal.
+			EAT_ASSERT(result >= 0);
+			if(result < 0)
+				newValue = kResultError; // This Semaphore is now considered dead and unusable.
+		}
+
+		return newValue;
+	}
+
+
+	int EA::Thread::Semaphore::GetCount() const
+	{
+		return mSemaphoreData.mnCount.GetValue();
+	}
+
+#elif !EA_THREADS_AVAILABLE
+
+	///////////////////////////////////////////////////////////////////////////////
+	// non-threaded implementation
+	///////////////////////////////////////////////////////////////////////////////
+
+	EASemaphoreData::EASemaphoreData()
+	  : mnCount(0), 
+		mnMaxCount(INT_MAX)
+	{
+		// Empty
+	}
+
+
+	EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* pName)
+	  : mInitialCount(initialCount),
+		mMaxCount(INT_MAX),
+		mbIntraProcess(bIntraProcess)
+	{
+		if(pName)
+		{
+			strncpy(mName, pName, sizeof(mName)-1);
+			mName[sizeof(mName)-1] = 0;
+		}
+		else
+			mName[0] = 0;
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
+	{
+		if(!pSemaphoreParameters && bDefaultParameters)
+		{
+			SemaphoreParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pSemaphoreParameters);
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(int initialCount)
+	{
+		SemaphoreParameters parameters(initialCount);
+		Init(&parameters);
+	}
+
+
+	EA::Thread::Semaphore::~Semaphore()
+	{
+	}
+
+
+	bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
+	{
+		if(pSemaphoreParameters)
+		{
+			mSemaphoreData.mnCount    = pSemaphoreParameters->mInitialCount;
+			mSemaphoreData.mnMaxCount = pSemaphoreParameters->mMaxCount;
+			return true;
+		}
+
+		return false;
+	}
+
+
+	int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
+	{
+		if(timeoutAbsolute == kTimeoutNone)
+		{
+			while(mSemaphoreData.mnCount <= 0)
+				ThreadSleep(1);
+
+			--mSemaphoreData.mnCount;
+		}
+		else if(timeoutAbsolute == 0)
+		{
+			if(mSemaphoreData.mnCount)
+				--mSemaphoreData.mnCount;
+			else
+				return kResultTimeout;
+		}
+		else
+		{
+			while((mSemaphoreData.mnCount <= 0) && (GetThreadTime() < timeoutAbsolute))
+				ThreadSleep(1);
+
+			if(mSemaphoreData.mnCount <= 0)
+				return kResultTimeout;
+		}
+
+		return mSemaphoreData.mnCount;
+	}
+
+
+	int EA::Thread::Semaphore::Post(int count)
+	{
+		EAT_ASSERT(mSemaphoreData.mnCount >= 0);
+
+		// Ideally, what we would do is account for the number of waiters in 
+		// this overflow calculation. If max-count = 4, count = 6, waiters = 8, 
+		// we would release 6 waiters and leave the semaphore at 2.
+		// The problem is that some of those 6 waiters might time out while we 
+		// are doing this and leave ourselves with count greater than max-count.
+		if((mSemaphoreData.mnMaxCount - count) < mSemaphoreData.mnCount)  // If count would cause an overflow...
+			return kResultError; // We do what most OS implementations of max-count do. // count = (mSemaphoreData.mnMaxCount - nLastCount);
+
+		return (mSemaphoreData.mnCount += count);
+	}
+
+
+	int EA::Thread::Semaphore::GetCount() const
+	{
+		return mSemaphoreData.mnCount;
+	}
+
+#endif // !EA_THREADS_AVAILABLE
+

+ 354 - 0
source/eathread_storage.cpp

@@ -0,0 +1,354 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/eathread_storage.h>
+#include <eathread/eathread.h>
+
+EA_DISABLE_VC_WARNING(4574)
+#include <new>
+EA_RESTORE_VC_WARNING()
+
+#if defined(EA_PLATFORM_SONY)
+	#include <kernel.h>
+
+	EA::Thread::ThreadLocalStorage::ThreadLocalStorage()
+		: mTLSData()
+	{
+		// To consider: Support the specification of a destructor instead of just passing NULL.
+		mTLSData.mResult = scePthreadKeyCreate(&mTLSData.mKey, NULL);
+		EAT_ASSERT(mTLSData.mResult == 0);
+	}
+
+
+	EA::Thread::ThreadLocalStorage::~ThreadLocalStorage()
+	{
+		if(mTLSData.mResult == 0)
+			scePthreadKeyDelete(mTLSData.mKey);
+	}
+
+
+	void* EA::Thread::ThreadLocalStorage::GetValue()
+	{
+		return scePthreadGetspecific(mTLSData.mKey);
+	}
+
+
+	bool EA::Thread::ThreadLocalStorage::SetValue(const void* pData)
+	{
+		if(scePthreadSetspecific(mTLSData.mKey, pData) == 0)
+			return true;
+		return false;
+	}
+
+
+
+#elif (defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE) && !defined(CS_UNDEFINED_STRING)
+	#if defined(EA_PLATFORM_UNIX)
+		#include <unistd.h>
+	#elif defined(EA_PLATFORM_WINDOWS)
+		#pragma warning(push, 0)
+		#include <Windows.h>
+		#pragma warning(pop)
+	#endif
+
+	EA::Thread::ThreadLocalStorage::ThreadLocalStorage()
+		: mTLSData()
+	{
+		// To consider: Support the specification of a destructor instead of just passing NULL.
+		mTLSData.mResult = pthread_key_create(&mTLSData.mKey, NULL);
+		EAT_ASSERT(mTLSData.mResult == 0);
+	}
+
+
+	EA::Thread::ThreadLocalStorage::~ThreadLocalStorage()
+	{
+		if(mTLSData.mResult == 0)
+			pthread_key_delete(mTLSData.mKey);
+	}
+
+
+	void* EA::Thread::ThreadLocalStorage::GetValue()
+	{
+		return pthread_getspecific(mTLSData.mKey);
+	}
+
+
+	bool EA::Thread::ThreadLocalStorage::SetValue(const void* pData)
+	{
+		if(pthread_setspecific(mTLSData.mKey, pData) == 0)
+			return true;
+		return false;
+	}
+
+
+
+#elif defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_WINDOWS_PHONE) && !(defined(EA_PLATFORM_WINDOWS) && !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)) 
+		#pragma warning(push, 0)
+		#include <Windows.h>
+		#pragma warning(pop)
+
+	EA::Thread::ThreadLocalStorage::ThreadLocalStorage()
+		: mTLSData(TlsAlloc())
+	{
+		EAT_ASSERT(mTLSData != TLS_OUT_OF_INDEXES);
+	}
+
+
+	EA::Thread::ThreadLocalStorage::~ThreadLocalStorage()
+	{
+		if(mTLSData != TLS_OUT_OF_INDEXES)
+			TlsFree(mTLSData);
+	}
+
+
+	void* EA::Thread::ThreadLocalStorage::GetValue()
+	{
+		return TlsGetValue(mTLSData);
+	}
+
+
+	bool EA::Thread::ThreadLocalStorage::SetValue(const void* pData)
+	{
+		if(TlsSetValue(mTLSData, (void*)pData))
+			return true;
+		return false;
+	}
+
+#elif (!EA_THREADS_AVAILABLE || defined(EA_PLATFORM_CONSOLE)) && !defined(CS_UNDEFINED_STRING)
+
+	#include <string.h>
+
+	#if !EA_THREADS_AVAILABLE
+		#define OSEnableInterrupts()
+		#define OSDisableInterrupts()
+	#else
+		#error Need to define EnableInterrupts/DisableInterrupts for the given platform.
+	#endif
+
+
+	EAThreadLocalStorageData::ThreadToDataPair* EAThreadLocalStorageData::GetTLSEntry(bool bCreateIfNotFound)
+	{
+		const int                  kArraySize = (sizeof(mDataArray) / sizeof(mDataArray[0]));
+		ThreadToDataPair*          pCurrent, *pEnd;
+
+		EA::Thread::ThreadUniqueId nCurrentThreadID;
+		EAThreadGetUniqueId(nCurrentThreadID);
+
+		// The code below is likely to execute very quickly and never transfers 
+		// execution outside the function, so we can very briefly disable interrupts
+		// for the period needed to do the logic below.
+		OSDisableInterrupts();
+
+		// We make the assumption that there are likely to be less than 10 threads most of 
+		// the time. Thus, instead of maintaining a sorted array and do a binary search 
+		// within that array, we do a linear search. An improvement would be to make the 
+		// array be sorted if it goes above some preset size, such as 20.
+		for(pCurrent = mDataArray, pEnd = mDataArray + mDataArrayCount; pCurrent < pEnd; ++pCurrent)
+		{
+			if(pCurrent->mThreadID == nCurrentThreadID)
+			{
+				OSEnableInterrupts();
+				return pCurrent;
+			}
+		}
+
+		if((pCurrent >= pEnd) && ((mDataArrayCount + 1) < kArraySize) && bCreateIfNotFound) // If we didn't find it above and there is more room and we should create if not found...
+		{
+			pCurrent = mDataArray + mDataArrayCount++;
+			pCurrent->mThreadID = nCurrentThreadID;
+		}
+		else
+			pCurrent = NULL;
+
+		OSEnableInterrupts();
+
+		return pCurrent;
+	}
+
+
+	EA::Thread::ThreadLocalStorage::ThreadLocalStorage()
+	{
+		memset(mTLSData.mDataArray, 0, sizeof(mTLSData.mDataArray));
+		mTLSData.mDataArrayCount = 0;
+	}
+
+
+	EA::Thread::ThreadLocalStorage::~ThreadLocalStorage()
+	{
+		// Nothing to do.
+	}
+
+
+	void* EA::Thread::ThreadLocalStorage::GetValue()
+	{
+		EAThreadLocalStorageData::ThreadToDataPair* const pTDP = mTLSData.GetTLSEntry(false);
+		if(pTDP)
+			return (void*)pTDP->mpData;
+		return NULL;
+	}
+
+
+	bool EA::Thread::ThreadLocalStorage::SetValue(const void* pData)
+	{
+		if(pData == NULL)
+		{  // We remove it from the container so that the container can have room for others.
+			EAThreadLocalStorageData::ThreadToDataPair* pTDP = mTLSData.GetTLSEntry(false);
+
+			if(pTDP)
+			{
+				OSDisableInterrupts(); // Briefly disable interrupts for the duration of the logic below.
+				const EAThreadLocalStorageData::ThreadToDataPair* const pTDPEnd = mTLSData.mDataArray + mTLSData.mDataArrayCount;
+				while(++pTDP <= pTDPEnd)    // What we do here is move all the other values downward. This is an O(n) operation, 
+					pTDP[-1] = pTDP[0];     // but the number of unique threads usinug us is likely to be pretty small.
+				mTLSData.mDataArrayCount = (int)(pTDPEnd - mTLSData.mDataArray - 1);
+				OSEnableInterrupts();
+			}
+			return true;
+		}
+
+		EAThreadLocalStorageData::ThreadToDataPair* const pTDP = mTLSData.GetTLSEntry(true);
+		if(pTDP)
+			pTDP->mpData = pData;
+		return (pTDP != NULL);
+	}
+
+#else
+
+	// Use reference std::map implementation.
+	EA_DISABLE_VC_WARNING(4574)
+	#include <map>
+	EA_RESTORE_VC_WARNING()
+
+	#include <eathread/eathread_futex.h>
+
+	void** EAThreadLocalStorageData::GetTLSEntry(bool bCreateIfNotFound)
+	{
+		EA::Thread::ThreadUniqueId nThreadID;
+		EAThreadGetUniqueId(nThreadID);
+
+		EA::Thread::AutoFutex autoFutex(mFutex);
+
+		if(bCreateIfNotFound) // We expect this to be true most of the time.
+		{
+			// Create as needed
+			if (mThreadToDataMap == NULL)
+			{
+				mThreadToDataMap = new std::map<EA::Thread::ThreadUniqueId, const void*>;
+			}
+
+			return (void**)(char*)&((*mThreadToDataMap)[nThreadID]); // Dereferencing a std::map value by index inserts the value if it is not present.
+		}
+
+		if (mThreadToDataMap == NULL)
+		{
+			return NULL;
+		}
+		
+		std::map<EA::Thread::ThreadUniqueId, const void*>::iterator it(mThreadToDataMap->find(nThreadID));
+		if(it != mThreadToDataMap->end())
+		{
+			std::map<EA::Thread::ThreadUniqueId, const void*>::value_type& value = *it;
+			return (void**)(char*)&value.second;
+		}
+		return NULL;
+	}
+
+
+	EA::Thread::ThreadLocalStorage::ThreadLocalStorage()
+	{
+	}
+
+
+	EA::Thread::ThreadLocalStorage::~ThreadLocalStorage()
+	{
+		// Nothing to do.
+	}
+
+
+	void* EA::Thread::ThreadLocalStorage::GetValue()
+	{
+		void** const ppData = mTLSData.GetTLSEntry(false);
+		if(ppData)
+			return *ppData;
+		return NULL;
+	}
+
+
+	bool EA::Thread::ThreadLocalStorage::SetValue(const void* pData)
+	{
+		if(pData == NULL)
+		{
+			ThreadUniqueId nThreadID;
+			EAThreadGetUniqueId(nThreadID);
+
+			EA::Thread::AutoFutex autoFutex(mTLSData.mFutex);
+			
+			if (mTLSData.mThreadToDataMap)
+			{
+				std::map<EA::Thread::ThreadUniqueId, const void*>::iterator it(mTLSData.mThreadToDataMap->find(nThreadID));
+				if(it != mTLSData.mThreadToDataMap->end())
+					mTLSData.mThreadToDataMap->erase(it);
+			}
+			return true;
+		}
+
+		void** const ppData = mTLSData.GetTLSEntry(true);
+		if(ppData)
+			*ppData = (void*)pData;
+		return (*ppData != NULL);
+	}
+
+#endif
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::ThreadLocalStorage* EA::Thread::ThreadLocalStorageFactory::CreateThreadLocalStorage()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::ThreadLocalStorage))) EA::Thread::ThreadLocalStorage;
+	else
+		return new EA::Thread::ThreadLocalStorage;
+}
+
+void EA::Thread::ThreadLocalStorageFactory::DestroyThreadLocalStorage(EA::Thread::ThreadLocalStorage* pThreadLocalStorage)
+{
+	if(gpAllocator)
+	{
+		pThreadLocalStorage->~ThreadLocalStorage();
+		gpAllocator->Free(pThreadLocalStorage);
+	}
+	else
+		delete pThreadLocalStorage;
+}
+
+size_t EA::Thread::ThreadLocalStorageFactory::GetThreadLocalStorageSize()
+{
+	return sizeof(EA::Thread::ThreadLocalStorage);
+}
+
+EA::Thread::ThreadLocalStorage* EA::Thread::ThreadLocalStorageFactory::ConstructThreadLocalStorage(void* pMemory)
+{
+	return new(pMemory) EA::Thread::ThreadLocalStorage;
+}
+
+void EA::Thread::ThreadLocalStorageFactory::DestructThreadLocalStorage(EA::Thread::ThreadLocalStorage* pThreadLocalStorage)
+{
+	pThreadLocalStorage->~ThreadLocalStorage();
+}
+
+
+#undef OSEnableInterrupts   
+#undef OSDisableInterrupts
+
+
+
+

+ 262 - 0
source/eathread_thread.cpp

@@ -0,0 +1,262 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/internal/config.h>
+#include "eathread/internal/eathread_global.h"
+#include <eathread/eathread_thread.h>
+#include <eathread/eathread_mutex.h>
+#include <new> // include new for placement new operator
+
+#if !EA_THREADS_AVAILABLE
+	// Do nothing
+#elif EA_USE_CPP11_CONCURRENCY
+	#include "cpp11/eathread_thread_cpp11.cpp"
+#elif defined(EA_PLATFORM_SONY)
+	#include "kettle/eathread_thread_kettle.cpp"
+#elif defined(EA_PLATFORM_UNIX) || EA_POSIX_THREADS_AVAILABLE
+	#include "unix/eathread_thread_unix.cpp"
+#elif defined(EA_PLATFORM_MICROSOFT)
+	#include "pc/eathread_thread_pc.cpp"
+#endif
+
+
+
+namespace EA
+{
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+	}
+}
+
+
+EA::Thread::Thread* EA::Thread::ThreadFactory::CreateThread()
+{
+	if(gpAllocator)
+		return new(gpAllocator->Alloc(sizeof(EA::Thread::Thread))) EA::Thread::Thread;
+	else
+		return new EA::Thread::Thread;
+}
+
+void EA::Thread::ThreadFactory::DestroyThread(EA::Thread::Thread* pThread)
+{
+	if(gpAllocator)
+	{
+		pThread->~Thread();
+		gpAllocator->Free(pThread);
+	}
+	else
+		delete pThread;
+}
+
+size_t EA::Thread::ThreadFactory::GetThreadSize()
+{
+	return sizeof(EA::Thread::Thread);
+}
+
+EA::Thread::Thread* EA::Thread::ThreadFactory::ConstructThread(void* pMemory)
+{
+	return new(pMemory) EA::Thread::Thread;
+}
+
+void EA::Thread::ThreadFactory::DestructThread(EA::Thread::Thread* pThread)
+{
+	pThread->~Thread();
+}
+
+EA::Thread::ThreadEnumData::ThreadEnumData()
+: mpThreadDynamicData(NULL)
+{
+}
+
+EA::Thread::ThreadEnumData::~ThreadEnumData()
+{
+	Release();
+}
+
+void EA::Thread::ThreadEnumData::Release()
+{
+	if(mpThreadDynamicData)
+	{
+		mpThreadDynamicData->Release();
+		mpThreadDynamicData = NULL;
+	}
+}
+
+extern const size_t kMaxThreadDynamicDataCount;
+EATHREAD_GLOBALVARS_EXTERN_INSTANCE;
+///////////////////////////////////////////////////////////////////////////////
+//
+size_t EA::Thread::EnumerateThreads(ThreadEnumData* pDataArray, size_t dataArrayCapacity)
+{
+	size_t requiredCount = 0;
+
+	if(dataArrayCapacity > EA::Thread::kMaxThreadDynamicDataCount)
+	   dataArrayCapacity = EA::Thread::kMaxThreadDynamicDataCount;  
+	
+	EATHREAD_GLOBALVARS.gThreadDynamicMutex.Lock();
+	for(size_t i(0); i < EA::Thread::kMaxThreadDynamicDataCount; i++)
+	{
+		if(EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[i].GetValue() != 0)
+		{
+			if(requiredCount < dataArrayCapacity)
+			{
+				pDataArray[requiredCount].mpThreadDynamicData = (EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+				pDataArray[requiredCount].mpThreadDynamicData->AddRef(); 
+			}
+			requiredCount++;
+		}
+	} 
+	EATHREAD_GLOBALVARS.gThreadDynamicMutex.Unlock();
+
+	return requiredCount;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// non-threaded implementation
+///////////////////////////////////////////////////////////////////////////////
+
+#if !EA_THREADS_AVAILABLE
+
+	// If mulitithreading support is not available, we can't implement anything
+	// here that works. All we do is define a null implementation that links
+	// but fails all operations.
+
+
+	EA::Thread::ThreadParameters::ThreadParameters()
+	  : mpStack(NULL),
+		mnStackSize(0),
+		mnPriority(kThreadPriorityDefault),
+		mnProcessor(kProcessorDefault),
+		mpName(""),
+		mbDisablePriorityBoost(false)
+	{
+	}
+
+
+	EA::Thread::Thread::Thread()
+	{
+		mThreadData.mpData = NULL;
+	}
+
+
+	EA::Thread::Thread::Thread(const Thread& /*t*/)
+	{
+	}
+
+
+	EA::Thread::Thread& EA::Thread::Thread::operator=(const Thread& /*t*/)
+	{
+		return *this;
+	}
+
+
+	EA::Thread::Thread::~Thread()
+	{
+	}
+
+
+	EA::Thread::RunnableFunctionUserWrapper  EA::Thread::Thread::sGlobalRunnableFunctionUserWrapper = NULL;
+	EA::Thread::RunnableClassUserWrapper     EA::Thread::Thread::sGlobalRunnableClassUserWrapper    = NULL;
+	EA::Thread::AtomicInt32                  EA::Thread::Thread::sDefaultProcessor                  = kProcessorAny;
+	EA::Thread::AtomicUint64                 EA::Thread::Thread::sDefaultProcessorMask              = UINT64_C(0xffffffffffffffff);
+
+
+	EA::Thread::RunnableFunctionUserWrapper EA::Thread::Thread::GetGlobalRunnableFunctionUserWrapper()
+	{
+		return sGlobalRunnableFunctionUserWrapper;
+	}
+
+	void EA::Thread::Thread::SetGlobalRunnableFunctionUserWrapper(EA::Thread::RunnableFunctionUserWrapper pUserWrapper)
+	{
+		if (sGlobalRunnableFunctionUserWrapper != NULL)
+		{
+			// Can only be set once in entire game. 
+			EAT_ASSERT(false);
+		}
+		else
+			sGlobalRunnableFunctionUserWrapper = pUserWrapper;
+	}
+
+	EA::Thread::RunnableClassUserWrapper EA::Thread::Thread::GetGlobalRunnableClassUserWrapper()
+	{
+		return sGlobalRunnableClassUserWrapper;
+	}
+
+	void EA::Thread::Thread::SetGlobalRunnableClassUserWrapper(EA::Thread::RunnableClassUserWrapper pUserWrapper)
+	{
+		if (sGlobalRunnableClassUserWrapper != NULL)
+		{
+			// Can only be set once in entire game. 
+			EAT_ASSERT(false);
+		}
+		else
+			sGlobalRunnableClassUserWrapper = pUserWrapper;
+	}
+
+
+	EA::Thread::ThreadId EA::Thread::Thread::Begin(RunnableFunction /*pFunction*/, void* /*pContext*/, const ThreadParameters* /*pTP*/, RunnableFunctionUserWrapper /*pUserWrapper*/)
+	{
+		return kThreadIdInvalid;
+	}
+
+
+	EA::Thread::ThreadId EA::Thread::Thread::Begin(IRunnable* /*pRunnable*/, void* /*pContext*/, const ThreadParameters* /*pTP*/, RunnableClassUserWrapper /*pUserWrapper*/)
+	{
+		return kThreadIdInvalid;
+	}
+
+
+	EA::Thread::Thread::Status EA::Thread::Thread::WaitForEnd(const ThreadTime& /*timeoutAbsolute*/, intptr_t* /*pThreadReturnValue*/)
+	{
+		return kStatusNone; 
+	}
+
+
+	EA::Thread::Thread::Status EA::Thread::Thread::GetStatus(intptr_t* /*pThreadReturnValue*/) const
+	{
+		return kStatusNone;
+	}
+
+
+	EA::Thread::ThreadId EA::Thread::Thread::GetId() const
+	{
+		return (ThreadId)kThreadIdInvalid;
+	}
+
+
+	int EA::Thread::Thread::GetPriority() const
+	{
+		return kThreadPriorityUnknown;
+	}
+
+
+	bool EA::Thread::Thread::SetPriority(int /*nPriority*/)
+	{
+		return false;
+	}
+
+
+	void EA::Thread::Thread::SetProcessor(int /*nProcessor*/)
+	{
+	}
+
+
+	void EA::Thread::Thread::Wake()
+	{
+	}
+
+
+	const char* EA::Thread::Thread::GetName() const
+	{
+		return "";
+	}
+
+ 
+	void EA::Thread::Thread::SetName(const char* /*pName*/)
+	{
+	}
+
+#endif // !EA_THREADS_AVAILABLE
+

+ 178 - 0
source/kettle/eathread_barrier_kettle.cpp

@@ -0,0 +1,178 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread_barrier.h>
+#include <eathread/eathread.h>
+#include <kernel.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+#include <new>
+
+EABarrierData::EABarrierData()
+	:  mCV(), mMutex(), mnHeight(0), mnCurrent(0), mnCycle(0), mbValid(false)
+{}
+
+
+EA::Thread::BarrierParameters::BarrierParameters(int height, bool bIntraProcess, const char* pName)
+	: mHeight(height), mbIntraProcess(bIntraProcess)
+{
+	if(pName)
+		strncpy(mName, pName, sizeof(mName)-1); 
+	else
+		mName[0] = 0;
+}
+
+
+EA::Thread::Barrier::Barrier(const BarrierParameters* pBarrierParameters, bool bDefaultParameters)
+{
+	if(!pBarrierParameters && bDefaultParameters)
+	{
+		BarrierParameters parameters;
+		Init(&parameters);
+	}
+	else
+		Init(pBarrierParameters);
+}
+
+
+EA::Thread::Barrier::Barrier(int height)
+{
+	BarrierParameters parameters(height);
+	Init(&parameters);
+}
+
+
+EA::Thread::Barrier::~Barrier()
+{
+	if(mBarrierData.mbValid){
+		EAT_ASSERT(mBarrierData.mnCurrent == mBarrierData.mnHeight);
+		int result = scePthreadMutexDestroy(&mBarrierData.mMutex);
+		EA_UNUSED(result);
+		EAT_ASSERT(result == 0);
+		result = scePthreadCondDestroy(&mBarrierData.mCV);
+		EAT_ASSERT(result == 0);
+		EA_UNUSED( result ); //if compiling without asserts
+	}
+}
+
+
+bool EA::Thread::Barrier::Init(const BarrierParameters* pBarrierParameters)
+{
+	if(pBarrierParameters && !mBarrierData.mbValid)
+	{
+		mBarrierData.mbValid   = false;
+		mBarrierData.mnHeight  = pBarrierParameters->mHeight;
+		mBarrierData.mnCurrent = pBarrierParameters->mHeight;
+		mBarrierData.mnCycle   = 0;
+
+		int result = scePthreadMutexInit(&mBarrierData.mMutex, NULL, pBarrierParameters->mName);
+		if(result == 0){
+			result = scePthreadCondInit(&mBarrierData.mCV, NULL, pBarrierParameters->mName);
+			if(result == 0)
+				mBarrierData.mbValid = true;
+			else
+				scePthreadMutexDestroy(&mBarrierData.mMutex);
+		}
+		return mBarrierData.mbValid;
+	}
+	return false;
+}
+
+
+EA::Thread::Barrier::Result EA::Thread::Barrier::Wait(const ThreadTime& timeoutAbsolute)
+{
+	if(!mBarrierData.mbValid){
+		EAT_ASSERT(false);
+		return kResultError;
+	}
+
+	int result = scePthreadMutexLock(&mBarrierData.mMutex);
+	if(result != 0){
+		EAT_ASSERT(false);
+		return kResultError;
+	}
+
+	const unsigned long nCurrentCycle = (unsigned)mBarrierData.mnCycle;
+	bool bPrimary = false;
+
+	if(--mBarrierData.mnCurrent == 0){ // This is not an atomic operation. We are within a mutex lock.
+		// The last barrier can never time out, as its action is always immediate.
+		mBarrierData.mnCycle++;
+		mBarrierData.mnCurrent = mBarrierData.mnHeight;
+		result = scePthreadCondBroadcast(&mBarrierData.mCV);
+
+		// The last thread into the barrier will return a result of
+		// kResultPrimary rather than kResultSecondary.
+		if(result == 0)
+			bPrimary = true;
+		//else leave result as an error value.
+	}
+	else{
+		// timeoutMilliseconds
+		// Wait with cancellation disabled, because pthreads barrier_wait
+		// should not be a cancellation point.
+		#if defined(SCE_PTHREAD_CANCEL_DISABLE)
+			int cancel;
+			scePthreadSetcancelstate(SCE_PTHREAD_CANCEL_DISABLE, &cancel);
+		#endif
+
+		// Wait until the barrier's cycle changes, which means that 
+		// it has been broadcast, and we don't want to wait anymore.
+		while(nCurrentCycle == mBarrierData.mnCycle){
+			do{
+				// Under SMP systems, pthread_cond_wait can return the success value 'spuriously'. 
+				// This is by design and we must retest the predicate condition and if it has
+				// not true, we must go back to waiting. 
+				result = scePthreadCondTimedwait(&mBarrierData.mCV, &mBarrierData.mMutex, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
+			} while((result == 0) && (nCurrentCycle == mBarrierData.mnCycle));
+			if(result != 0)
+				break;
+		}
+
+		#if defined(SCE_PTHREAD_CANCEL_DISABLE)
+			int cancelTemp;
+			scePthreadSetcancelstate(cancel, &cancelTemp);
+		#endif
+	}
+
+	// We declare a new result2 value because the old one 
+	// might have a special value from above in it.
+	const int result2 = scePthreadMutexUnlock(&mBarrierData.mMutex); (void)result2;
+	EAT_ASSERT(result2 == 0);
+
+	if(result == 0)
+		return bPrimary ? kResultPrimary : kResultSecondary;
+	else if(result == ETIMEDOUT)
+		return kResultTimeout;
+	return kResultError;
+}
+
+
+EA::Thread::Barrier* EA::Thread::BarrierFactory::CreateBarrier()
+{
+	EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+	if(pAllocator)
+		return new(pAllocator->Alloc(sizeof(EA::Thread::Barrier))) EA::Thread::Barrier;
+	else
+		return new EA::Thread::Barrier;
+}
+
+void EA::Thread::BarrierFactory::DestroyBarrier(EA::Thread::Barrier* pBarrier)
+{
+	EA::Thread::Allocator* pAllocator = EA::Thread::GetAllocator();
+
+	if(pAllocator)
+	{
+		pBarrier->~Barrier();
+		pAllocator->Free(pBarrier);
+	}
+	else
+		delete pBarrier;
+}
+
+
+

+ 557 - 0
source/kettle/eathread_callstack_kettle.cpp

@@ -0,0 +1,557 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_atomic.h>
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <eathread/eathread_storage.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <machine/signal.h>
+#include <sdk_version.h>
+#include <unistd.h>
+
+
+// EATHREAD_PTHREAD_SIGACTION_SUPPORTED
+//
+// Defined as 0 or 1.
+//
+#if !defined(EATHREAD_PTHREAD_SIGACTION_SUPPORTED)
+	//#if EATHREAD_SCEDBG_ENABLED || defined(EA_DEBUG)
+	//    #define EATHREAD_PTHREAD_SIGACTION_SUPPORTED 1
+	//#else
+	//    #define EATHREAD_PTHREAD_SIGACTION_SUPPORTED 0
+	//#endif
+
+	// Disabling due to syscall crashing on SDK 1.6.
+	#define EATHREAD_PTHREAD_SIGACTION_SUPPORTED 0
+#endif
+
+
+#if EATHREAD_PTHREAD_SIGACTION_SUPPORTED 
+	// Until Sony provides a declaration for this or an alternative scheme, we declare this ourselves.
+	__BEGIN_DECLS
+
+	// User-level applications use as integer registers for passing the sequence %rdi, %rsi, %rdx, %rcx, %r8 and %r9. 
+	// The kernel interface uses %rdi, %rsi, %rdx, %r10, %r8 and %r9, which is what matters to us below.
+	// http://www.ibm.com/developerworks/library/l-ia/index.html
+	// A system-call is done via the syscall instruction. The kernel destroys registers %rcx and %r11.
+	// The number of the syscall has to be passed in register %rax.
+	// System-calls are limited to six arguments, no argument is passed directly on the stack.
+	// Returning from the syscall, register %rax contains the result of the system-call. A value in the range between -4095 and -1 indicates an error, it is -errno.
+	// Only values of class INTEGER or class MEMORY are passed to the kernel.
+	// Relevant BSD source code: https://bitbucket.org/freebsd/freebsd-head/src/36b017c6a0f817439d40abfd790238dfa13e2be3/lib/libthr/thread?at=default
+	// The BSD pthread struct: https://bitbucket.org/freebsd/freebsd-head/src/36b017c6a0f817439d40abfd790238dfa13e2be3/lib/libthr/thread/thr_private.h?at=default
+	// Some NetBSD pthread source: http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libpthread/pthread.c?rev=1.134&content-type=text/x-cvsweb-markup&only_with_tag=MAIN
+
+	static int sigaction(int sig, const struct sigaction * __restrict act, struct sigaction * __restrict oact)
+	{
+			int result;
+			__asm__ __volatile__( 
+					"mov %%rcx, %%r10\n\t"
+					"syscall\n\t"
+					: "=a"(result) : "a"(416), "D"(sig), "S"(act), "d"(oact));
+			return result;
+	}
+
+
+	// #define SYS_thr_kill 433
+	// typedef long thread_t 
+
+	// pthread_t is an opaque typedef for struct pthread. struct pthread looks like so: 
+	//    struct pthread {
+	//        long tid; // Kernel thread id. 
+	//        . . .     // Many other members.
+	//    }
+	// Thus you can directly reinterpret_cast pthread to a pointer to a kernel thread id.
+	#if !defined(GetTidFromPthread)
+		#define GetTidFromPthread(pthreadId) *reinterpret_cast<long*>(pthreadId)
+	#endif
+
+	static int thr_kill(long thread, int sig)
+	{
+		int result;
+		__asm__ __volatile__( 
+				"mov %%rcx, %%r10\n\t"
+				"syscall\n\t"
+				: "=a"(result) : "a"(433), "D"(thread), "S"(sig));
+		return result;
+	}
+
+	static int pthread_kill(pthread_t pthreadId, int sig)
+	{
+		long tid = GetTidFromPthread(pthreadId);
+		thr_kill(tid, sig);
+		return 0;
+	}
+
+	const size_t kBacktraceSignalHandlerIgnoreCount = 2; // It's unclear what this value should be. On one machine it was 4, but on another it was 2. Going with a lower number is more conservative. Possibly a debug/opt thing?
+
+	__END_DECLS
+#endif
+
+
+// Sony may remove this header in the future, so we use the clang __has_include feature to detect if and when that occurs.
+
+// NOTE:  Use of unwind.h is disabled on PS4 due to syscall hangs in the kernel
+// experienced by Frostbite when overloadiing user_malloc to generate a
+// callstack.  In addition, Sony recommends the use of __builtin_frame_address
+// / __builtin_return_address over _Unwind_Backtrace as it is more performant
+// due to the frame pointers being included by default in all builds.
+
+// Thread that stats performance of __builtin_frame_pointer is better.
+// https://ps4.scedev.net/forums/thread/2267/
+
+// Open support ticket for syscall hang:
+// https://ps4.scedev.net/forums/thread/52687/
+
+#if __has_include(<unwind.h>) && !defined(EA_PLATFORM_SONY)
+	#include <unwind.h>
+
+	#if !defined(EA_HAVE_UNWIND_H)
+		#define EA_HAVE_UNWIND_H 1
+	#endif
+#else
+	#if !defined(EA_NO_HAVE_UNWIND_H)
+		#define EA_NO_HAVE_UNWIND_H 1
+	#endif
+#endif
+
+
+
+namespace EA
+{
+namespace Thread
+{
+
+
+///////////////////////////////////////////////////////////////////////////////
+// InitCallstack
+//
+EATHREADLIB_API void InitCallstack()
+{
+	// Nothing needed.
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ShutdownCallstack
+//
+EATHREADLIB_API void ShutdownCallstack()
+{
+	// Nothing needed.
+}
+
+
+EATHREADLIB_API void GetInstructionPointer(void*& p)
+{
+	p = __builtin_return_address(0);
+}
+
+
+
+#if defined(EA_HAVE_UNWIND_H)
+	// This is a callback function which libunwind calls, once per callstack entry.
+	struct UnwindCallbackContext
+	{
+		void** mpReturnAddressArray;
+		size_t mReturnAddressArrayCapacity;
+		size_t mReturnAddressArrayIndex;
+	};
+
+	static _Unwind_Reason_Code UnwindCallback(_Unwind_Context* pUnwindContext, void* pUnwindCallbackContextVoid)
+	{
+		UnwindCallbackContext* pUnwindCallbackContext = (UnwindCallbackContext*)pUnwindCallbackContextVoid;
+
+		if(pUnwindCallbackContext->mReturnAddressArrayIndex < pUnwindCallbackContext->mReturnAddressArrayCapacity)
+		{
+			uintptr_t ip = _Unwind_GetIP(pUnwindContext);
+			pUnwindCallbackContext->mpReturnAddressArray[pUnwindCallbackContext->mReturnAddressArrayIndex++] = (void*)ip;
+			return _URC_NO_REASON;
+		}
+
+		return _URC_NORMAL_STOP;
+	}
+#endif
+
+
+
+
+#if EATHREAD_PTHREAD_SIGACTION_SUPPORTED 
+	namespace Local
+	{
+		enum EAThreadBacktraceState
+		{
+			// Positive thread lwp ids are here implicitly.
+			EATHREAD_BACKTRACE_STATE_NONE    = -1,
+			EATHREAD_BACKTRACE_STATE_DUMPING = -2,
+			EATHREAD_BACKTRACE_STATE_DONE    = -3,
+			EATHREAD_BACKTRACE_STATE_CANCEL  = -4
+		};
+
+		struct ThreadBacktraceState
+		{
+			EA::Thread::AtomicInt32 mState;              // One of enum EAThreadBacktraceState or (initially) the thread id of the thread we are targeting.
+			void**                  mCallstack;          // Output param
+			size_t                  mCallstackCapacity;  // Input param, refers to array capacity of mCallstack.
+			size_t                  mCallstackCount;     // Output param
+			ScePthread              mPthread;   	     // Output param
+
+			ThreadBacktraceState() : mState(EATHREAD_BACKTRACE_STATE_NONE), mCallstackCapacity(0), mCallstackCount(0), mPthread(NULL){}
+		};
+
+
+		static ScePthreadMutex      gThreadBacktraceMutex = SCE_PTHREAD_MUTEX_INITIALIZER;
+		static ThreadBacktraceState gThreadBacktraceState; // Protected by gThreadBacktraceMutex.
+
+
+		static void gThreadBacktraceSignalHandler(int /*sigNum*/, siginfo_t* /*pSigInfo*/, void* pSigContextVoid)
+		{
+			int32_t lwpSelf = *(int32_t*)scePthreadSelf();
+
+			if(gThreadBacktraceState.mState.SetValueConditional(EATHREAD_BACKTRACE_STATE_DUMPING, lwpSelf))
+			{
+				gThreadBacktraceState.mPthread = scePthreadSelf();
+
+				if(gThreadBacktraceState.mCallstackCapacity)
+				{
+					gThreadBacktraceState.mCallstackCount = GetCallstack(gThreadBacktraceState.mCallstack, gThreadBacktraceState.mCallstackCapacity, (const CallstackContext*)NULL);
+
+					// At this point we need to remove the top N entries and insert an entry for where the thread's instruction pointer is.
+
+					// We originally had code like the following, but it's returning a signal 
+					// handling address now that  we are using our own pthread_kill function:
+					//if(gThreadBacktraceState.mCallstackCount >= kBacktraceSignalHandlerIgnoreCount) // This should always be true.
+					//{
+					//    gThreadBacktraceState.mCallstackCount -= (kBacktraceSignalHandlerIgnoreCount - 1);
+					//    memmove(&gThreadBacktraceState.mCallstack[1], &gThreadBacktraceState.mCallstack[kBacktraceSignalHandlerIgnoreCount], (gThreadBacktraceState.mCallstackCount - 1) * sizeof(void*));
+					//}
+					//else
+					//    gThreadBacktraceState.mCallstackCount = 1;
+					//gThreadBacktraceState.mCallstack[0] = pSigContextVoid ? reinterpret_cast<void*>(reinterpret_cast<sigcontext*>((uintptr_t)pSigContextVoid + 48)->sc_rip) : NULL;
+
+					// New code that's working for our own pthread_kill function usage:
+					if(gThreadBacktraceState.mCallstackCount >= kBacktraceSignalHandlerIgnoreCount) // This should always be true.
+					{
+						gThreadBacktraceState.mCallstackCount -= kBacktraceSignalHandlerIgnoreCount;
+						memmove(&gThreadBacktraceState.mCallstack[0], &gThreadBacktraceState.mCallstack[kBacktraceSignalHandlerIgnoreCount], gThreadBacktraceState.mCallstackCount * sizeof(void*));
+					}
+				}
+				else
+					gThreadBacktraceState.mCallstackCount = 0;
+
+				gThreadBacktraceState.mState.SetValue(EATHREAD_BACKTRACE_STATE_DONE);
+			}
+			// else this thread received an unexpected SIGURG. This can happen if it was so delayed that 
+			// we timed out waiting for it to happen and moved on.
+		}
+	}
+#endif
+
+
+/// GetCallstack
+///
+/// This is a version of GetCallstack which gets the callstack of a thread based on its thread id as opposed to 
+/// its register state. It works by injecting a signal handler into the given thread and reading the self callstack
+/// then exiting from the signal handler. The GetCallstack function sets this up, generates the signal for the 
+/// other thread, then waits for it to complete. It uses the SIGURG signal for this.
+///
+/// Primary causes of failure:
+///     The target thread has SIGURG explicitly ignored.
+///     The target thread somehow is getting too little CPU time to respond to the signal.
+///
+/// To do: Change this function to take a ThreadInfo as a last parameter instead of pthread_t. And have the 
+/// ThreadInfo return additional basic thread information. Or maybe even change this function to be a 
+/// GetThreadInfo function instead of GetCallstack.
+///
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, EA::Thread::ThreadId& pthread)
+{
+	size_t callstackCount = 0;
+
+	#if EATHREAD_PTHREAD_SIGACTION_SUPPORTED
+		using namespace Local;
+
+		if(pthread)
+		{
+			ScePthread pthreadSelf = scePthreadSelf();
+			int32_t    lwp         = *(int32_t*)pthread;
+			int32_t    lwpSelf     = *(int32_t*)pthreadSelf;
+
+			if(lwp == lwpSelf) // This function can be called only for a thread other than self.
+				callstackCount = GetCallstack(pReturnAddressArray, nReturnAddressArrayCapacity, (const CallstackContext*)NULL);
+			else
+			{
+				struct sigaction act;   memset(&act, 0, sizeof(act));
+				struct sigaction oact;  memset(&oact, 0, sizeof(oact));
+	
+				act.sa_sigaction = gThreadBacktraceSignalHandler;
+				act.sa_flags     = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
+
+				scePthreadMutexLock(&gThreadBacktraceMutex);
+
+				if(sigaction(SIGURG, &act, &oact) == 0)
+				{
+					gThreadBacktraceState.mCallstack         = pReturnAddressArray;
+					gThreadBacktraceState.mCallstackCapacity = nReturnAddressArrayCapacity;
+					gThreadBacktraceState.mState.SetValue(lwp);
+
+					// Signal the specific thread that we want to dump.
+					int32_t stateTemp = lwp;
+
+					if(pthread_kill(pthread, SIGURG) == 0)
+					{
+						// Wait for the other thread to start dumping the stack, or time out.
+						for(int waitMS = 200; waitMS; waitMS--)
+						{
+							stateTemp = gThreadBacktraceState.mState.GetValue();
+
+							if(stateTemp != lwp)
+								break;
+
+							usleep(1000); // This sleep gives the OS the opportunity to execute the target thread, even if it's of a lower priority than this thread.
+						}
+					} 
+					// else apparently failed to send SIGURG to the thread, or the thread was paused in a way that it couldn't receive it.
+
+					if(stateTemp == lwp) // If the operation timed out or seemingly never started...
+					{
+						if(gThreadBacktraceState.mState.SetValueConditional(EATHREAD_BACKTRACE_STATE_CANCEL, lwp)) // If the backtrace still didn't start, and we were able to stop it by setting the state to cancel...
+							stateTemp = EATHREAD_BACKTRACE_STATE_CANCEL;
+						else
+							stateTemp = gThreadBacktraceState.mState.GetValue();    // It looks like the backtrace thread did in fact get a late start and is now executing
+					}
+
+					// Wait indefinitely for the dump to finish or be canceled.
+					// We cannot apply a timeout here because the other thread is accessing state that
+					// is owned by this thread.
+					for(int waitMS = 100; (stateTemp == EATHREAD_BACKTRACE_STATE_DUMPING) && waitMS; waitMS--) // If the thread is (still) busy writing it out its callstack...
+					{
+						usleep(1000);
+						stateTemp = gThreadBacktraceState.mState.GetValue();
+					}
+
+					if(stateTemp == EATHREAD_BACKTRACE_STATE_DONE)
+						callstackCount = gThreadBacktraceState.mCallstackCount;
+					// Else give up on it. It's OK to just fall through.
+
+					// Restore the original SIGURG handler.
+					sigaction(SIGURG, &oact, NULL);
+				}
+
+				scePthreadMutexUnlock(&gThreadBacktraceMutex);
+			}
+		}
+	#endif
+
+	return callstackCount;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstack
+//
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+{
+	#if defined(EA_HAVE_UNWIND_H)
+		// libunwind can only read the stack from the current thread.
+		// However, we can accomplish this for another thread by injecting a signal handler into that thread.
+		// See the EAThreadBacktrace() function source code above.
+
+		if(pContext == NULL) // If reading the current thread's context...
+		{
+			UnwindCallbackContext context = { pReturnAddressArray, nReturnAddressArrayCapacity, 0 };
+			_Unwind_Backtrace(&UnwindCallback, &context);
+			return context.mReturnAddressArrayIndex;
+		}
+
+		// We don't yet have a means to read another thread's context.
+		return 0;
+	#else
+		// This platform doesn't use glibc and so the backtrace() function isn't available.
+		// For debug builds we can follow the stack frame manually, as stack frames are usually available in debug builds.
+		EA_UNUSED(pReturnAddressArray);
+		EA_UNUSED(nReturnAddressArrayCapacity);
+
+		size_t index = 0;
+		void** sp = nullptr;
+		void** new_sp = nullptr;
+		const uintptr_t kPtrSanityCheckLimit = 1*1024*1024;
+
+		if (pContext == NULL)
+		{
+			// Arguments are passed in registers on x86-64, so we can't just offset from &pReturnAddressArray.
+			sp = (void**)__builtin_frame_address(0);
+		}
+		else
+		{
+			// On kettle it's not recommended to omit the frame pointer so we check that RBP is sane before use since 
+			// it could have been omitted. From Sony Docs:
+			// "[omit frame pointer] will inhibit unwinding and ... the option may also increase code size since the 
+			// encoding for stack-based addressing is often 1 byte longer then RBP-based (frame pointer) addressing. 
+			// With PlayStation®4 Clang, frame pointer omission may not lead to improved performance. 
+			// Performance analysis and code  profiling are recommended before using this option"
+			sp = (void**)((pContext->mRBP - pContext->mRSP) > kPtrSanityCheckLimit ? pContext->mRSP : pContext->mRBP);
+			pReturnAddressArray[index++] = (void*)pContext->mRIP;
+		}
+
+		for(int count = 0; sp && (index < nReturnAddressArrayCapacity); sp = new_sp, ++count)
+		{
+			if(count > 0 || index != 0) // We skip the current frame if we haven't set it already above
+				pReturnAddressArray[index++] = *(sp + 1);
+
+			new_sp = (void**)*sp;
+
+			if((new_sp < sp) || (new_sp > (sp + kPtrSanityCheckLimit)))
+				break;
+		}         
+
+		return index;
+	#endif
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
+{
+	ScePthread self         = scePthreadSelf();
+	ScePthread pthread_Id   = (ScePthread)threadId; // Requires that ScePthread is a pointer or integral type.
+
+	if(scePthreadEqual(pthread_Id, self))
+	{
+		void* pInstruction;
+
+		// This is some crazy GCC code that happens to work:
+		pInstruction = ({ __label__ label; label: &&label; });
+
+		context.mRIP = (uint64_t)pInstruction;
+		context.mRSP = (uint64_t)__builtin_frame_address(1);
+		context.mRBP = 0;
+	}
+	else
+	{
+		// There is currently no way to do this.
+		memset(&context, 0, sizeof(context));
+		return false;
+	}
+
+	return true;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContextSysThreadId
+//
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
+{
+	// Assuming we are using pthreads, sysThreadId == threadId.
+	return GetCallstackContext(context, sysThreadId);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
+{
+	context.mRIP = pContext->Rip;
+	context.mRSP = pContext->Rsp;
+	context.mRBP = pContext->Rbp;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleFromAddress
+//
+EATHREADLIB_API size_t GetModuleFromAddress(const void* /*address*/, char* pModuleName, size_t /*moduleNameCapacity*/)
+{
+	// Not currently implemented for the given platform.
+	pModuleName[0] = 0;
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleHandleFromAddress
+//
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* /*pAddress*/)
+{
+	// Not currently implemented for the given platform.
+	return 0;
+}
+
+
+
+EA::Thread::ThreadLocalStorage sStackBase;
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* pStackBase)
+{
+	if(pStackBase)
+		sStackBase.SetValue(pStackBase);
+	else
+	{
+		pStackBase = __builtin_frame_address(0);
+
+		if(pStackBase)
+			SetStackBase(pStackBase);
+		// Else failure; do nothing.
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	void* pBase;
+
+	if(GetPthreadStackInfo(&pBase, NULL))
+		return pBase;
+
+	// Else we require the user to have set this previously, usually via a call 
+	// to SetStackBase() in the start function of this currently executing 
+	// thread (or main for the main thread).
+	pBase = sStackBase.GetValue();
+
+	if(pBase == NULL)
+		pBase = (void*)(((uintptr_t)&pBase + 4095) & ~4095); // Make a guess, round up to next 4096.
+
+	return pBase;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	void* pLimit;
+
+	if(GetPthreadStackInfo(NULL, &pLimit))
+		return pLimit;
+
+	pLimit = __builtin_frame_address(0);
+
+	return (void*)((uintptr_t)pLimit & ~4095); // Round down to nearest page.
+}
+
+
+
+} // namespace Thread
+} // namespace EA
+
+
+
+
+
+
+

+ 121 - 0
source/kettle/eathread_condition_kettle.cpp

@@ -0,0 +1,121 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+#include <eathread/eathread_condition.h>
+#include <kernel.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+
+
+EAConditionData::EAConditionData()
+{
+	memset(&mCV, 0, sizeof(mCV));
+}
+
+
+EA::Thread::ConditionParameters::ConditionParameters(bool bIntraProcess, const char* pName)
+	: mbIntraProcess(bIntraProcess)
+{
+	if (pName)
+	{
+		strncpy(mName, pName, sizeof(mName) - 1);
+		mName[sizeof(mName) - 1] = 0;
+	}
+	else
+		mName[0] = 0;
+}
+
+
+EA::Thread::Condition::Condition(const ConditionParameters* pConditionParameters, bool bDefaultParameters)
+{
+	if(!pConditionParameters && bDefaultParameters)
+	{
+		ConditionParameters parameters;
+		Init(&parameters);
+	}
+	else
+		Init(pConditionParameters);
+}
+
+
+EA::Thread::Condition::~Condition()
+{
+	scePthreadCondDestroy(&mConditionData.mCV);
+}
+
+
+bool EA::Thread::Condition::Init(const ConditionParameters* pConditionParameters)
+{
+	if(pConditionParameters)
+	{
+		ScePthreadCondattr cattr;
+		scePthreadCondattrInit(&cattr);
+		const int result = scePthreadCondInit(&mConditionData.mCV, &cattr, pConditionParameters->mName);
+		EAT_ASSERT(result == 0);
+
+		scePthreadCondattrDestroy(&cattr);
+		return (result == 0);
+	}
+
+	return false;
+}
+
+
+EA::Thread::Condition::Result EA::Thread::Condition::Wait(Mutex* pMutex, const ThreadTime& timeoutAbsolute)
+{
+	int result;
+	ScePthreadMutex* pMutex_t;
+	EAMutexData* pMutexData;
+
+	EAT_ASSERT(pMutex);
+
+	// We have a small problem here in that if we are using the pMutex argument, 
+	// the pthread_cond_wait call will unlock the mutex via the internal mutex data and
+	// not without calling the Mutex::Lock function. The result is that the Mutex doesn't
+	// have its lock count value reduced by one and so other threads will see the lock
+	// count as being 1 when in fact it should be zero. So we account for that here
+	// by manually maintaining the lock count, which we can do because we have the lock.
+	EAT_ASSERT(pMutex->GetLockCount() == 1);
+	pMutexData = (EAMutexData*)pMutex->GetPlatformData();
+	pMutexData->SimulateLock(false);
+	pMutex_t = &pMutexData->mMutex;
+
+	if(timeoutAbsolute == kTimeoutNone)
+		result = scePthreadCondWait(&mConditionData.mCV, pMutex_t);
+	else
+		result = scePthreadCondTimedwait(&mConditionData.mCV, pMutex_t, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));    
+
+	pMutexData->SimulateLock(true);
+	EAT_ASSERT(!pMutex || (pMutex->GetLockCount() == 1));
+
+	if(result != 0)
+	{
+		if(result == SCE_KERNEL_ERROR_ETIMEDOUT)
+			return kResultTimeout;
+		EAT_ASSERT(false);
+		return kResultError;
+	}
+	return kResultOK;
+}
+
+
+bool EA::Thread::Condition::Signal(bool bBroadcast)
+{
+	if(bBroadcast)
+		return (scePthreadCondBroadcast(&mConditionData.mCV) == 0);
+	return (scePthreadCondSignal(&mConditionData.mCV) == 0);
+}
+
+
+
+
+
+
+
+
+
+

+ 393 - 0
source/kettle/eathread_kettle.cpp

@@ -0,0 +1,393 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_thread.h>
+#include <eathread/eathread_atomic.h>
+#include <eathread/eathread_storage.h>
+
+#include <sched.h>
+#include <unistd.h>
+#if defined(_YVALS)
+	#include <time.h>
+#else
+	#include <sys/time.h>
+#endif
+
+#include <kernel.h>
+#include <sceerror.h>
+#include <sdk_version.h>
+#include <cpuid.h>
+#include <new>
+#include <string.h>
+
+namespace EA
+{
+	namespace Thread
+	{
+		// Assertion variables.
+		EA::Thread::AssertionFailureFunction gpAssertionFailureFunction = NULL;
+		void*                                gpAssertionFailureContext  = NULL;
+	}
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Variables required for ThreadSleep
+//
+// TLS var for quicker lookups to our thread's data so we may grab the thread local EAThreadTimerQueue
+static EA_THREAD_LOCAL EAThreadDynamicData* tpThreadDynamicData = nullptr;
+// In the event a non-EAThread requires a timer queue we may supply the global instance
+static EAThreadTimerQueue gThreadTimerQueue;
+////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+EA::Thread::ThreadId EA::Thread::GetThreadId()
+{
+	// https://ps4.scedev.net/forums/thread/12697/
+	// https://ps4.scedev.net/forums/thread/53323/
+	// 
+	// ScePthread scePthreadSelf() does not return a integral thread id value. Instead it returns a ScePthread structure
+	// with is actually a pointer to a pthread structure (eg. pthread*).  On other Sony platforms, an API like
+	// scePthreadGetthreadid was available for this use case but this isn't the case on the PS4.  The above scedev.net
+	// threads indicate that the request for an additiona API to retrieve the kernel threadid has been submitted to
+	// Sony.  Until this feature is shipped in a future SDK update we use the following technique to get a scalar thread
+	// id value that matches the threadid presented in the PS4 debugger. 
+
+	const EA::Thread::ThreadId currentThreadId = *reinterpret_cast<EA::Thread::ThreadId*>(scePthreadSelf());
+	return currentThreadId;
+}
+
+EA::Thread::ThreadId EA::Thread::GetThreadId(EA::Thread::SysThreadId id)
+{
+	EAThreadDynamicData* const pTDD = EA::Thread::FindThreadDynamicData(id);
+	if(pTDD)
+	{   
+		return pTDD->mThreadId;
+	}
+
+	return EA::Thread::kThreadIdInvalid;
+}
+
+int EA::Thread::GetThreadPriority()
+{
+	int         policy;
+	sched_param param;
+	SysThreadId currentThreadId = scePthreadSelf();
+
+	int result = scePthreadGetschedparam(currentThreadId, &policy, &param);
+	if(result == SCE_OK)
+	{
+		// Kettle pthreads uses a reversed interpretation of sched_get_priority_min and sched_get_priority_max.
+		return -1 * (param.sched_priority - SCE_KERNEL_PRIO_FIFO_DEFAULT);
+	}
+
+	return kThreadPriorityDefault;
+}
+
+
+bool EA::Thread::SetThreadPriority(int nPriority)
+{
+	SysThreadId             currentThreadId = scePthreadSelf();
+	int                     policy;
+	SceKernelSchedParam     param;
+	int                     result = -1;
+
+	EAT_ASSERT(nPriority != kThreadPriorityUnknown);
+	 
+	result = scePthreadGetschedparam(currentThreadId, &policy, &param);
+	if(result == SCE_OK)
+	{
+		// Kettle pthreads uses a reversed interpretation of sched_get_priority_min and sched_get_priority_max.
+		const int nMin = SCE_KERNEL_PRIO_FIFO_HIGHEST;
+		const int nMax = SCE_KERNEL_PRIO_FIFO_LOWEST;
+
+		param.sched_priority = (SCE_KERNEL_PRIO_FIFO_DEFAULT + (-1 * nPriority));
+
+		// Clamp to min/max as appropriate for current scheduling policy
+		if(param.sched_priority < nMin)
+			param.sched_priority = nMin;
+		else if(param.sched_priority > nMax)
+			param.sched_priority = nMax;
+
+		result = scePthreadSetprio(currentThreadId, param.sched_priority);
+	}
+		 
+	return (result == SCE_OK);
+}
+
+
+void* EA::Thread::GetThreadStackBase()
+{
+	void* pStackAddr = NULL;
+	int result;
+
+	ScePthreadAttr attr;
+	result = scePthreadAttrInit(&attr); 
+	EAT_ASSERT(SCE_OK == result);
+	result = scePthreadAttrGet(scePthreadSelf(), &attr);
+	EAT_ASSERT(SCE_OK == result);
+	result = scePthreadAttrGetstackaddr(&attr, &pStackAddr); 
+	EAT_ASSERT(SCE_OK == result);
+	result = scePthreadAttrDestroy(&attr); 
+	EAT_ASSERT(SCE_OK == result);
+	EA_UNUSED(result);
+
+	return pStackAddr; 
+}
+
+namespace
+{
+	SceKernelCpumask GetSceKernelAllCpuMask()
+	{
+	#if (SCE_ORBIS_SDK_VERSION >= 0x03000000u)
+		return (EA::Thread::GetProcessorCount() == 6) ? SCE_KERNEL_CPUMASK_6CPU_ALL : SCE_KERNEL_CPUMASK_7CPU_ALL;
+	#else
+		nAffinityMask &= 0x3f;
+	#endif
+	}
+}
+
+
+void EA::Thread::SetThreadProcessor(int nProcessor)
+{
+	SceKernelCpumask mask = GetSceKernelAllCpuMask();
+	if (nProcessor >= 0)
+		mask = (SceKernelCpumask)(1 << nProcessor);
+
+	int result = scePthreadSetaffinity(scePthreadSelf(), mask);
+	EA_UNUSED(result);
+	EAT_ASSERT(SCE_OK == result);
+}
+
+int EA::Thread::GetThreadProcessor()
+{
+	return sceKernelGetCurrentCpu();
+}
+
+EATHREADLIB_API void EA::Thread::SetThreadAffinityMask(const EA::Thread::ThreadId& id, ThreadAffinityMask nAffinityMask)
+{
+	// Update the affinity mask in the thread dynamic data cache.
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if(pTDD)
+	{
+		pTDD->mnThreadAffinityMask = nAffinityMask;
+	}
+
+#if EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
+	nAffinityMask &= GetSceKernelAllCpuMask();
+	int res = scePthreadSetaffinity(GetSysThreadId(id), static_cast<SceKernelCpumask>(nAffinityMask));
+	EAT_ASSERT(SCE_OK == res);
+	EA_UNUSED(res);
+#endif
+}
+
+EATHREADLIB_API EA::Thread::ThreadAffinityMask EA::Thread::GetThreadAffinityMask(const EA::Thread::ThreadId& id)
+{ 
+	// Update the affinity mask in the thread dynamic data cache.
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if(pTDD)
+	{
+		return pTDD->mnThreadAffinityMask;
+	}
+
+	return kThreadAffinityMaskAny;
+}
+
+namespace Internal
+{
+	void SetThreadName(EAThreadDynamicData* pTDD)
+	{
+		if(pTDD)
+		{
+			EAT_COMPILETIME_ASSERT(EATHREAD_NAME_SIZE == 32); // New name (up to 32 bytes including the NULL terminator), or NULL due to Sony OS constraint
+			char buf[EATHREAD_NAME_SIZE];
+			snprintf(buf, sizeof(buf), "%s", pTDD->mName);
+			buf[EATHREAD_NAME_SIZE - 1] = 0;
+
+			auto sceResult = scePthreadRename(pTDD->mSysThreadId, buf);
+			EA_UNUSED(sceResult);
+			EAT_ASSERT(SCE_OK == sceResult);
+		}
+	}
+};
+
+EATHREADLIB_API void EA::Thread::SetThreadName(const char* pName) { SetThreadName(GetThreadId(), pName); }
+EATHREADLIB_API const char* EA::Thread::GetThreadName() { return GetThreadName(GetThreadId()); }
+
+EATHREADLIB_API void EA::Thread::SetThreadName(const EA::Thread::ThreadId& id, const char* pName)
+{
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if (pTDD)
+	{
+		strncpy(pTDD->mName, pName, EATHREAD_NAME_SIZE);
+		pTDD->mName[EATHREAD_NAME_SIZE - 1] = 0;
+
+		Internal::SetThreadName(pTDD);
+	}
+}
+
+EATHREADLIB_API const char* EA::Thread::GetThreadName(const EA::Thread::ThreadId& id)
+{ 
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	return pTDD ? pTDD->mName : "";
+}
+
+int EA::Thread::GetProcessorCount() 
+{ 
+#if (SCE_ORBIS_SDK_VERSION >= 0x03000000u)
+	return sceKernelGetCpumode() == SCE_KERNEL_CPUMODE_6CPU ? 6 : 7; 
+#else
+	return 6;
+#endif
+}
+
+void EA::Thread::ThreadSleep(const ThreadTime& timeRelative)
+{
+	if(timeRelative == kTimeoutImmediate)
+	{
+		scePthreadYield();
+	}
+	else
+	{
+		SceKernelTimespec ts;
+		static const double MILLISECONDS_TO_NANOSECONDS = 1000000.0;
+		static const uint64_t SECONDS_TO_NANOSECONDS = 1000000000;
+
+		// make sure we compute this with doubles then uint64_t or we will run out of bits precision
+		uint64_t timeNanoSeconds = (uint64_t)(MILLISECONDS_TO_NANOSECONDS * timeRelative);
+		ts.tv_sec = timeNanoSeconds / SECONDS_TO_NANOSECONDS;
+		ts.tv_nsec = static_cast<long>(timeNanoSeconds % SECONDS_TO_NANOSECONDS);  // converting from milliseconds to nanoseconds.
+
+
+		// Determine which TimerQueue to use. Timer Queues are used to allow for higher resolution sleeps
+		EAThreadTimerQueue* pThreadTimerQueue = nullptr;
+		if (EA_UNLIKELY(tpThreadDynamicData == nullptr))
+		{
+			// This is either the first time an EAThread thread has called ThreadSleep or we are calling ThreadSleep
+			// from a non-eathread function. Find the ThreadDynamicData which houses the TimerQueue and if not present (we are 
+			// using a non-eathread) grab the global instance instead.
+			tpThreadDynamicData = EA::Thread::FindThreadDynamicData(EA::Thread::GetThreadId());
+			if (tpThreadDynamicData)
+			{
+				pThreadTimerQueue = &tpThreadDynamicData->mThreadTimerQueue;
+			}
+			else
+			{
+				pThreadTimerQueue = &gThreadTimerQueue;
+			}
+		}
+		else
+		{
+			pThreadTimerQueue = &tpThreadDynamicData->mThreadTimerQueue;
+		}
+		
+		// Timer queues may only accept sleep values between 100 microseconds and while we guarantee pThreadTimerQueue will
+		// not be null, we must ensure it has been enabled since it may fail in two uncommon ways:
+		// 1. The underlying Sony Queue failed to initialize (such as too many queues currently being created) 
+		// 2. This function (ThreadSleep) is called during static initialization and due to static initialization order
+		//    we haven't had a chance to initialize the global static EAThreadTimerQueue instance
+		if (EA_LIKELY((timeNanoSeconds < (SECONDS_TO_NANOSECONDS * 100)) && pThreadTimerQueue->mbEnabled))
+		{
+			const long kMinTimeForTimerEventNanoSeconds = 100000; // 100 microseconds represented in nanoseconds
+			ts.tv_nsec = EA_UNLIKELY((ts.tv_nsec < kMinTimeForTimerEventNanoSeconds) && (ts.tv_sec != 0)) ?
+				kMinTimeForTimerEventNanoSeconds : ts.tv_nsec;
+
+			// it's ok to submit negative ids to the queue in the event that mCurrentId wraps around
+			int result = sceKernelAddHRTimerEvent(pThreadTimerQueue->mTimerEventQueue, (int)pThreadTimerQueue->mCurrentId++, &ts, nullptr);
+			EA_UNUSED(result);
+			EAT_ASSERT_FORMATTED(result == SCE_OK, "sceKernelAddHRTimerEvent returned an error (0x%08x)", result);
+
+			int out;
+			SceKernelEvent ev;
+			result = sceKernelWaitEqueue(pThreadTimerQueue->mTimerEventQueue, &ev, 1, &out, nullptr);
+			EAT_ASSERT_FORMATTED(result == SCE_OK, "sceKernelWaitEqueue returned an error (0x%08x)", result);
+		}
+		else
+		{
+			int result = sceKernelNanosleep(&ts, 0);
+			EA_UNUSED(result);
+			EAT_ASSERT_MSG(result == SCE_OK, "sceKernelNanosleep returned an error");
+		}
+	}
+}
+
+
+namespace EA 
+{ 
+	namespace Thread 
+	{ 
+		EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId);     
+	}
+}
+
+void EA::Thread::ThreadEnd(intptr_t threadReturnValue)
+{
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(GetThreadId());
+
+	if(pTDD)
+	{
+		pTDD->mnStatus = Thread::kStatusEnded;
+		pTDD->mnReturnValue = threadReturnValue;
+		pTDD->mRunMutex.Unlock();
+		pTDD->Release();
+	}
+
+	scePthreadExit((void*)threadReturnValue);
+}
+
+EA::Thread::ThreadTime EA::Thread::GetThreadTime()
+{    
+	SceKernelTimespec ts;
+	sceKernelClockGettime(SCE_KERNEL_CLOCK_MONOTONIC, &ts);
+	ThreadTime ret = EA_TIMESPEC_AS_DOUBLE_IN_MS(ts);
+	return ret;
+}
+
+
+void EA::Thread::SetAssertionFailureFunction(EA::Thread::AssertionFailureFunction pAssertionFailureFunction, void* pContext)
+{
+	gpAssertionFailureFunction = pAssertionFailureFunction;
+	gpAssertionFailureContext  = pContext;
+}
+
+
+void EA::Thread::AssertionFailure(const char* pExpression)
+{
+	if(gpAssertionFailureFunction)
+		gpAssertionFailureFunction(pExpression, gpAssertionFailureContext);
+	else
+	{
+		#if EAT_ASSERT_ENABLED
+			// Todo.
+		#endif
+	}
+}
+
+
+EA::Thread::SysThreadId EA::Thread::GetSysThreadId(EA::Thread::ThreadId id)
+{
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if (pTDD)
+	{
+		return pTDD->mSysThreadId;
+	}
+
+	return kSysThreadIdInvalid;
+}
+
+EA::Thread::SysThreadId EA::Thread::GetSysThreadId()
+{
+	return scePthreadSelf();
+}
+
+
+
+
+
+
+
+
+
+
+

+ 199 - 0
source/kettle/eathread_mutex_kettle.cpp

@@ -0,0 +1,199 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/internal/config.h>
+#include <eathread/eathread_mutex.h>
+#include <errno.h>
+#include <string.h>
+#include <sceerror.h>
+
+EAMutexData::EAMutexData()
+	: mMutex(), mnLockCount(0)
+{
+	#if EAT_ASSERT_ENABLED
+		mThreadId = EA::Thread::kThreadIdInvalid; 
+	#endif
+
+	::memset(&mMutex, 0, sizeof(mMutex));
+}
+
+void EAMutexData::SimulateLock(bool bLock)
+{
+	if(bLock)
+	{
+		++mnLockCount;
+		EAT_ASSERT((mThreadId = EA::Thread::GetThreadId()) || true); // Intentionally '=' here and not '=='.
+	}
+	else
+	{
+		--mnLockCount;
+		EAT_ASSERT((mThreadId = EA::Thread::kThreadIdInvalid) || true); // Intentionally '=' here and not '=='.
+	}
+}
+
+
+EA::Thread::MutexParameters::MutexParameters(bool bIntraProcess, const char* pName)
+	: mbIntraProcess(bIntraProcess)
+{
+	mName[0] = '\0';
+
+	if (pName != nullptr)
+	{
+		strncpy(mName, pName, sizeof(mName) - 1);
+		mName[sizeof(mName) - 1] = '\0';
+	}
+}
+
+
+EA::Thread::Mutex::Mutex(const MutexParameters* pMutexParameters, bool bDefaultParameters)
+{
+	if(!pMutexParameters && bDefaultParameters)
+	{
+		MutexParameters parameters;
+		Init(&parameters);
+	}
+	else
+		Init(pMutexParameters);
+}
+
+
+EA::Thread::Mutex::~Mutex()
+{
+	EAT_ASSERT(mMutexData.mnLockCount == 0);
+	scePthreadMutexDestroy(&mMutexData.mMutex);
+}
+
+
+bool EA::Thread::Mutex::Init(const MutexParameters* pMutexParameters)
+{
+	if(pMutexParameters)
+	{
+		mMutexData.mnLockCount = 0;
+
+		ScePthreadMutexattr attr;
+		scePthreadMutexattrInit(&attr);
+
+		scePthreadMutexattrSettype(&attr, SCE_PTHREAD_MUTEX_RECURSIVE);
+
+		#if defined(SCE_PTHREAD_PROCESS_PRIVATE)    // Some pthread_disabled implementations don't recognize this.
+			if(pMutexParameters->mbIntraProcess)                
+				scePthreadMutexattrSettype(&attr, SCE_PTHREAD_PROCESS_PRIVATE); 
+			else                
+				scePthreadMutexattrSettype(&attr, SCE_PTHREAD_PROCESS_PRIVATE); 
+		#endif
+
+		// kettle mutex name is restricted to 32 bytes INCLUDING null character. See "scePthreadMutexInit"
+		char mutexNameCopy[32];
+		strncpy(mutexNameCopy, pMutexParameters->mName, sizeof(mutexNameCopy) - 1);
+		mutexNameCopy[sizeof(mutexNameCopy)-1] = '\0';
+
+		// Sony allocates memory for any length string which reduces the amount of active mutex allowed by the operating
+		// system.  We only provide a string if it is non-zero in length.
+		int result = SCE_KERNEL_ERROR_EAGAIN;
+		if (pMutexParameters->mName[0] != '\0')
+		{
+			result = scePthreadMutexInit(&mMutexData.mMutex, &attr, mutexNameCopy);
+		}
+
+		if (result == SCE_KERNEL_ERROR_EAGAIN)
+		{
+			// We've hit the limit for named mutexes on PS4, so fallback to an unnamed mutex which has a much higher limit
+			result = scePthreadMutexInit(&mMutexData.mMutex, &attr, NULL);
+		}
+		scePthreadMutexattrDestroy(&attr);
+
+		EAT_ASSERT(SCE_OK == result);
+		return (SCE_OK == result);
+	}
+
+	return false;
+}
+
+
+int EA::Thread::Mutex::Lock(const ThreadTime& timeoutAbsolute)
+{
+	int result;
+
+	EAT_ASSERT(mMutexData.mnLockCount < 100000);
+
+	if(timeoutAbsolute == kTimeoutNone)
+	{
+		result = scePthreadMutexLock(&mMutexData.mMutex);
+
+		if(result != 0)
+		{
+			EAT_ASSERT(false);
+			return kResultError;
+		}
+	}
+	else if(timeoutAbsolute == kTimeoutImmediate)
+	{
+		result = scePthreadMutexTrylock(&mMutexData.mMutex);
+
+		if(result != 0)
+		{
+			if(result == SCE_KERNEL_ERROR_EBUSY)
+				return kResultTimeout;
+
+			EAT_ASSERT(false);
+			return kResultError;
+		}
+	}
+	else
+	{        
+		result = scePthreadMutexTimedlock(&mMutexData.mMutex, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
+
+		if(result != 0)
+		{
+			if(result == SCE_KERNEL_ERROR_ETIMEDOUT)
+				return kResultTimeout;
+
+			EAT_ASSERT(false);
+			return kResultError;
+		}
+	}
+
+	EAT_ASSERT(mMutexData.mThreadId = EA::Thread::GetThreadId()); // Intentionally '=' here and not '=='.
+	EAT_ASSERT(mMutexData.mnLockCount >= 0);
+	return ++mMutexData.mnLockCount; // This is safe to do because we have the lock.
+}
+
+
+int EA::Thread::Mutex::Unlock()
+{
+	EAT_ASSERT(mMutexData.mThreadId == EA::Thread::GetThreadId());
+	EAT_ASSERT(mMutexData.mnLockCount > 0);
+
+	const int nReturnValue(--mMutexData.mnLockCount); // This is safe to do because we have the lock.
+
+	if(scePthreadMutexUnlock(&mMutexData.mMutex) != 0)
+	{
+		EAT_ASSERT(false);
+		return nReturnValue + 1;
+	}
+
+	return nReturnValue;
+}
+
+
+int EA::Thread::Mutex::GetLockCount() const
+{
+	return mMutexData.mnLockCount;
+}
+
+
+bool EA::Thread::Mutex::HasLock() const
+{
+	#if EAT_ASSERT_ENABLED
+		return (mMutexData.mnLockCount > 0) && (mMutexData.mThreadId == GetThreadId());
+	#else
+		return (mMutexData.mnLockCount > 0); // This is the best we can do.
+	#endif
+}
+
+
+
+
+

+ 61 - 0
source/kettle/eathread_pthread_stack_info.cpp

@@ -0,0 +1,61 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/eathread_callstack.h>
+#include <stdlib.h>
+
+namespace EA
+{
+namespace Thread
+{
+	// With some implementations of pthread_disabled, the stack base is returned by pthread_disabled as NULL if it's the main thread,
+	// or possibly if it's a thread you created but didn't call pthread_disabled_attr_setstack manually to provide your 
+	// own stack. It's impossible for us to tell here whether will be such a NULL return value, so we just do what
+	// we can and the user nees to beware that a NULL return value means that the system doesn't provide the 
+	// given information for the current thread. This function returns false and sets pBase and pLimit to NULL in 
+	// the case that the thread base and limit weren't returned by the system or were returned as NULL.    
+	bool GetPthreadStackInfo(void** pBase, void** pLimit)
+	{
+		bool  returnValue = false;
+		size_t stackSize;
+		void* pBaseTemp = NULL;
+		void* pLimitTemp  = NULL;
+
+		ScePthreadAttr attr;
+			
+		scePthreadAttrInit(&attr);
+
+		int result = scePthreadAttrGet(scePthreadSelf(), &attr);
+		if(result == 0)  // SCE_OK (=0)
+		{
+			result = scePthreadAttrGetstack(&attr, &pLimitTemp, &stackSize);
+			if((result == 0) && (pLimitTemp != NULL)) // If success...
+			{
+				pBaseTemp   = (void*)((uintptr_t)pLimitTemp + stackSize); // p is returned by pthread_disabled_attr_getstack as the lowest address in the stack, and not the stack base.
+				returnValue = true;
+			}
+			else
+			{
+				pBaseTemp  = NULL;
+				pLimitTemp = NULL;
+			}
+		}
+
+		scePthreadAttrDestroy(&attr);
+
+		if(pBase)
+			*pBase = pBaseTemp;
+		if(pLimit)
+			*pLimit = pLimitTemp;
+
+		return returnValue;
+	}
+
+} // namespace Callstack
+} // namespace EA
+
+
+
+
+

+ 177 - 0
source/kettle/eathread_semaphore_kettle.cpp

@@ -0,0 +1,177 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread_semaphore.h>
+
+#if defined(EA_PLATFORM_SONY)
+
+	#include <kernel/semaphore.h>
+	#include <sceerror.h>
+
+	EASemaphoreData::EASemaphoreData()
+	: mSemaphore(NULL), mnMaxCount(INT_MAX), mnCount(0)
+	{
+	}
+
+	EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* pName)
+		: mInitialCount(initialCount), mMaxCount(INT_MAX), mbIntraProcess(bIntraProcess)
+	{
+		// Maximum lenght for the semaphore name on Kettle is 32 (including NULL terminator)
+		EAT_COMPILETIME_ASSERT(sizeof(mName) <= 32);
+
+		if (pName)
+		{
+			strncpy(mName, pName, sizeof(mName)-1);
+			mName[sizeof(mName)-1] = 0;
+		}
+		else
+		{
+			mName[0] = 0;
+		}
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
+	{
+		if (!pSemaphoreParameters && bDefaultParameters)
+		{
+			SemaphoreParameters parameters;
+			Init(&parameters);
+		}
+		else
+		{
+			Init(pSemaphoreParameters);
+		}
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(int initialCount)
+	{
+		SemaphoreParameters parameters(initialCount);
+		Init(&parameters);
+	}
+
+
+	EA::Thread::Semaphore::~Semaphore()
+	{
+		int result = sceKernelDeleteSema(mSemaphoreData.mSemaphore);
+		EAT_ASSERT(result == SCE_OK); EA_UNUSED(result);
+	}
+
+
+	bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
+	{
+		if (pSemaphoreParameters 
+			&& pSemaphoreParameters->mInitialCount >= 0
+			&& pSemaphoreParameters->mMaxCount >= 0)
+		{
+			mSemaphoreData.mnMaxCount = pSemaphoreParameters->mMaxCount;
+			mSemaphoreData.mnCount = pSemaphoreParameters->mInitialCount;
+
+			int result = sceKernelCreateSema(
+				&mSemaphoreData.mSemaphore,
+				pSemaphoreParameters->mName,
+				SCE_KERNEL_SEMA_ATTR_TH_FIFO,
+				mSemaphoreData.mnCount,
+				mSemaphoreData.mnMaxCount,
+				NULL);
+
+			if (result == SCE_OK)
+				return true;
+		}
+
+		// Failure: could not create semaphore
+		return false;
+	}
+
+
+	int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
+	{
+		int result = 0;
+
+		// Convert timeout from absolute to relative (possibly losing some capacity)        
+		SceKernelUseconds timeoutRelativeUs = static_cast<SceKernelUseconds>(RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
+		do
+		{
+			if (timeoutAbsolute == kTimeoutImmediate)
+			{
+				result = sceKernelPollSema(mSemaphoreData.mSemaphore, 1);
+			}
+			else
+			{
+				result = sceKernelWaitSema(mSemaphoreData.mSemaphore, 1, &timeoutRelativeUs);
+			}
+
+			if (result != SCE_OK)
+			{
+				// SCE_KERNEL_ERROR_ETIMEDOUT is the failure case for 'sceKernelWaitSema'
+				// SCE_KERNEL_ERROR_EBUSY is the failure case for 'sceKernelPollSema'
+				// We want to consume the SCE_KERNEL_ERROR_EBUSY error code from the polling interface
+				// users have a consistent error code to check against.
+				if (result == SCE_KERNEL_ERROR_ETIMEDOUT || result == SCE_KERNEL_ERROR_EBUSY) 
+				{
+					if (timeoutAbsolute != kTimeoutNone)
+						return kResultTimeout;
+				}
+				else
+				{
+					EAT_FAIL_MSG("Semaphore::Wait: sceKernelWaitSema failure.");
+					return kResultError;
+				}
+			}
+		} while (result != SCE_OK);
+
+		// Success
+		EAT_ASSERT(mSemaphoreData.mnCount.GetValue() > 0);
+		return static_cast<int>(mSemaphoreData.mnCount.Decrement());
+	}
+
+
+	int EA::Thread::Semaphore::Post(int count)
+	{
+		EAT_ASSERT(count >= 0);
+
+		const int currentCount = mSemaphoreData.mnCount;
+
+		if (count > 0)
+		{
+			// If count would cause an overflow exit early
+			if ((mSemaphoreData.mnMaxCount - count) < currentCount)
+				return kResultError;
+
+			// We increment the count before we signal the semaphore so that any waken up
+			// thread will have the right count immediately
+			mSemaphoreData.mnCount.Add(count);
+
+			int result = sceKernelSignalSema(mSemaphoreData.mSemaphore, count);
+
+			if (result != SCE_OK)
+			{
+				// If not successful set the count back
+				mSemaphoreData.mnCount.Add(-count);
+				return kResultError;
+			}
+		}
+
+		return currentCount + count; // It's possible that another thread may have modified this value since we changed it, but that's not important.
+	}
+
+
+	int EA::Thread::Semaphore::GetCount() const
+	{
+		// There is no way to query the semaphore for the resource count on Kettle, 
+		// we need to rely on our external atomic counter
+		return mSemaphoreData.mnCount.GetValue();
+	}
+
+#endif // EA_PLATFORM_KETTLE
+
+
+
+
+
+
+
+

+ 799 - 0
source/kettle/eathread_thread_kettle.cpp

@@ -0,0 +1,799 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread_thread.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_sync.h>
+#include <eathread/eathread_callstack.h>
+#include <new>
+#include <kernel.h>
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sceerror.h>
+
+
+#define EA_ALLOW_POSIX_THREADS_PRIORITIES 1
+
+
+namespace
+{
+	// We convert a an EAThread priority (higher value implies higher priority) to a native priority 
+	// value, as some implementations of pthread_disableds use lower values to indicate higher priority.
+	void ConvertToNativePriority(int eathreadPriority, sched_param& param, int& policy)
+	{
+		using namespace EA::Thread;
+
+		policy = SCE_KERNEL_SCHED_RR;
+
+		const int nMin = SCE_KERNEL_PRIO_FIFO_HIGHEST;
+		const int nMax = SCE_KERNEL_PRIO_FIFO_LOWEST;
+
+		// Kettle pthread_disableds uses a reversed interpretation of sched_get_priority_min and sched_get_priority_max.
+		param.sched_priority = (SCE_KERNEL_PRIO_FIFO_DEFAULT + (-1 * eathreadPriority));
+
+		if(param.sched_priority < nMin)
+			param.sched_priority = nMin;
+		else if(param.sched_priority > nMax)
+			param.sched_priority = nMax;
+	}
+
+
+	// We convert a native priority value to an EAThread priority (higher value implies higher 
+	// priority), as some implementations of pthread_disableds use lower values to indicate higher priority.
+	int ConvertFromNativePriority(const sched_param& param, int policy)
+	{
+		using namespace EA::Thread;
+
+		// Some implementations of pthreads associate higher priorities with smaller
+		// integer values. We hide this. To the user, a higher value must always
+		// indicate higher priority.
+
+		// Kettle pthread_disableds uses a reversed interpretation of sched_get_priority_min and sched_get_priority_max.
+		return -1 * (param.sched_priority - SCE_KERNEL_PRIO_FIFO_DEFAULT);
+	}
+
+
+	// Setup stack and/or priority of a new thread
+	void SetupThreadAttributes(ScePthreadAttr& creationAttribs, const EA::Thread::ThreadParameters* pTP)
+	{
+		int result = 0;
+		EA_UNUSED( result ); //only used for assertions
+
+		// We create the thread as attached, and we'll call either pthread_disabled_join or pthread_disabled_detach, 
+		// depending on whether WaitForEnd (pthread_disabled_join) is called or not (pthread_disabled_detach).
+
+		if(pTP)
+		{
+			// Set thread stack address and/or size
+			if(pTP->mpStack)
+			{
+				EAT_ASSERT(pTP->mnStackSize != 0);
+				result = scePthreadAttrSetstack(&creationAttribs, (void*)pTP->mpStack, pTP->mnStackSize);
+				EAT_ASSERT(result == 0);
+			}
+			else if(pTP->mnStackSize)
+			{
+				result = scePthreadAttrSetstacksize(&creationAttribs, pTP->mnStackSize);
+				EAT_ASSERT(result == 0);
+			}
+
+			// Set initial non-zero priority
+			// Even if pTP->mnPriority == kThreadPriorityDefault, we need to run this on some platforms, as the thread priority for new threads on them isn't the same as the thread priority for the main thread.
+			int         policy = SCHED_OTHER;
+			sched_param param;
+
+			ConvertToNativePriority(pTP->mnPriority, param, policy);
+			result = scePthreadAttrSetschedpolicy(&creationAttribs, policy);
+			EAT_ASSERT(result == 0);
+			result = scePthreadAttrSetschedparam(&creationAttribs, &param);
+			EAT_ASSERT(result == 0);
+
+			// Unix doesn't let you specify thread CPU affinity via pthread_disabled attributes.
+			// Instead you need to call sched_setaffinity or pthread_setaffinity_np.
+		}
+		else
+		{
+			result = scePthreadAttrSetschedpolicy(&creationAttribs, SCE_KERNEL_SCHED_RR);
+			EAT_ASSERT(result == 0);
+		}
+	}
+
+// This function is not currently used if the thread name can be set from any other thread
+#if !EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+
+	void SetCurrentThreadName(const char8_t* pName)
+	{
+		EAT_COMPILETIME_ASSERT(EATHREAD_NAME_SIZE == 32);  // New name (up to 32 bytes including the NULL terminator), or NULL  
+		scePthreadRename(scePthreadSelf(), pName);
+	}
+
+#endif
+
+	static void SetPlatformThreadAffinity(EAThreadDynamicData* pTDD)
+	{
+		if(pTDD->mThreadId != EA::Thread::kThreadIdInvalid) // If the thread has been created...
+		{
+			SceKernelCpumask mask;
+			mask = (1 << pTDD->mStartupProcessor) & 0xFF;
+			int nResult = scePthreadSetaffinity(pTDD->mSysThreadId, mask);
+			EAT_ASSERT(nResult == SCE_OK); EA_UNUSED(nResult);
+		}
+		// Else the thread hasn't started yet, or has already exited. Let the thread set its own 
+		// affinity when it starts.
+	}
+
+} // namespace
+
+
+
+
+namespace EA
+{ 
+	namespace Thread
+	{
+		extern Allocator* gpAllocator;
+
+		const size_t kMaxThreadDynamicDataCount = 128;
+
+		struct EAThreadGlobalVars
+		{
+			EA_PREFIX_ALIGN(8)
+			char        gThreadDynamicData[kMaxThreadDynamicDataCount][sizeof(EAThreadDynamicData)] EA_POSTFIX_ALIGN(8);
+			AtomicInt32 gThreadDynamicDataAllocated[kMaxThreadDynamicDataCount];
+			Mutex gThreadDynamicMutex;
+		};
+		EATHREAD_GLOBALVARS_CREATE_INSTANCE;       
+
+		EAThreadDynamicData* AllocateThreadDynamicData()
+		{
+			for(size_t i(0); i < kMaxThreadDynamicDataCount; i++)
+			{
+				if(EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[i].SetValueConditional(1, 0))
+					return (EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+			}
+
+			// This is a safety fallback mechanism. In practice it won't be used in almost all situations.
+			if(gpAllocator)
+				return (EAThreadDynamicData*)gpAllocator->Alloc(sizeof(EAThreadDynamicData));
+			else
+				return (EAThreadDynamicData*)new char[sizeof(EAThreadDynamicData)]; // We assume the returned alignment is sufficient.
+		}
+
+		void FreeThreadDynamicData(EAThreadDynamicData* pEAThreadDynamicData)
+		{
+			if((pEAThreadDynamicData >= (EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData) && (pEAThreadDynamicData < ((EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData + kMaxThreadDynamicDataCount)))
+			{
+				pEAThreadDynamicData->~EAThreadDynamicData();
+				EATHREAD_GLOBALVARS.gThreadDynamicDataAllocated[pEAThreadDynamicData - (EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData].SetValue(0);
+			}
+			else
+			{
+				// Assume the data was allocated via the fallback mechanism.
+				pEAThreadDynamicData->~EAThreadDynamicData();
+				if(gpAllocator)
+					gpAllocator->Free(pEAThreadDynamicData);
+				else
+					delete[] (char*)pEAThreadDynamicData;
+			}
+		}
+
+		// This is a public function.
+		EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId)
+		{
+			for(size_t i(0); i < kMaxThreadDynamicDataCount; i++)
+			{
+				EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+
+				if(pTDD->mThreadId == threadId)
+					return pTDD;
+			}
+
+			return NULL; // This is no practical way we can find the data unless thread-specific storage was involved.
+		}
+
+		EAThreadDynamicData* FindThreadDynamicData(SysThreadId sysThreadId)
+		{
+			for(size_t i(0); i < kMaxThreadDynamicDataCount; i++)
+			{
+				EAThreadDynamicData* const pTDD = (EAThreadDynamicData*)(void*)EATHREAD_GLOBALVARS.gThreadDynamicData[i];
+
+				if(pTDD->mSysThreadId == sysThreadId)
+					return pTDD;
+			}
+
+			return NULL; // This is no practical way we can find the data unless thread-specific storage was involved.
+		}
+	}
+}
+
+
+EAThreadDynamicData::EAThreadDynamicData()
+  : mThreadId(EA::Thread::kThreadIdInvalid),
+	mSysThreadId(0),
+	mThreadPid(0),
+	mnStatus(EA::Thread::Thread::kStatusNone),
+	mnReturnValue(0),
+  //mpStartContext[],
+	mpBeginThreadUserWrapper(NULL),
+	mnRefCount(0),
+  //mName[],
+	mStartupProcessor(EA::Thread::kProcessorDefault),
+	mRunMutex(),
+	mStartedSemaphore(),
+	mnThreadAffinityMask(EA::Thread::kThreadAffinityMaskAny)
+{
+	memset(mpStartContext, 0, sizeof(mpStartContext));
+	memset(mName, 0, sizeof(mName));
+}
+
+
+EAThreadDynamicData::~EAThreadDynamicData()
+{
+	if(mThreadId != EA::Thread::kThreadIdInvalid)
+		scePthreadDetach(mSysThreadId);
+
+	mThreadId = EA::Thread::kThreadIdInvalid;
+	mThreadPid = 0;
+	mSysThreadId = 0;
+}
+
+
+void EAThreadDynamicData::AddRef()
+{
+	mnRefCount.Increment();    // Note that mnRefCount is an AtomicInt32.
+}
+
+
+void EAThreadDynamicData::Release()
+{
+	if(mnRefCount.Decrement() == 0)   // Note that mnRefCount is an AtomicInt32.
+		EA::Thread::FreeThreadDynamicData(this);
+}
+
+
+EA::Thread::ThreadParameters::ThreadParameters()
+  : mpStack(NULL),
+	mnStackSize(0),
+	mnPriority(kThreadPriorityDefault),  
+	mnProcessor(kProcessorDefault),
+	mpName(""),
+	mnAffinityMask(kThreadAffinityMaskAny), 
+	mbDisablePriorityBoost(false)
+{
+	// Empty
+}
+
+
+EA::Thread::RunnableFunctionUserWrapper  EA::Thread::Thread::sGlobalRunnableFunctionUserWrapper = NULL;
+EA::Thread::RunnableClassUserWrapper     EA::Thread::Thread::sGlobalRunnableClassUserWrapper    = NULL;
+EA::Thread::AtomicInt32                  EA::Thread::Thread::sDefaultProcessor                  = kProcessorAny;
+EA::Thread::AtomicUint64                 EA::Thread::Thread::sDefaultProcessorMask              = UINT64_C(0xffffffffffffffff);
+
+
+EA::Thread::RunnableFunctionUserWrapper EA::Thread::Thread::GetGlobalRunnableFunctionUserWrapper()
+{
+	return sGlobalRunnableFunctionUserWrapper;
+}
+
+
+void EA::Thread::Thread::SetGlobalRunnableFunctionUserWrapper(EA::Thread::RunnableFunctionUserWrapper pUserWrapper)
+{
+	if(sGlobalRunnableFunctionUserWrapper)
+		EAT_FAIL_MSG("Thread::SetGlobalRunnableFunctionUserWrapper already set."); // Can only be set once for the application. 
+	else
+		sGlobalRunnableFunctionUserWrapper = pUserWrapper;
+}
+
+
+EA::Thread::RunnableClassUserWrapper EA::Thread::Thread::GetGlobalRunnableClassUserWrapper()
+{
+	return sGlobalRunnableClassUserWrapper;
+}
+
+
+void EA::Thread::Thread::SetGlobalRunnableClassUserWrapper(EA::Thread::RunnableClassUserWrapper pUserWrapper)
+{
+	if(sGlobalRunnableClassUserWrapper)
+		EAT_FAIL_MSG("EAThread::SetGlobalRunnableClassUserWrapper already set."); // Can only be set once for the application. 
+	else
+		sGlobalRunnableClassUserWrapper = pUserWrapper;
+}
+
+
+EA::Thread::Thread::Thread()
+{
+	mThreadData.mpData = NULL;
+}
+
+
+EA::Thread::Thread::Thread(const Thread& t)
+  : mThreadData(t.mThreadData)
+{
+	if(mThreadData.mpData)
+		mThreadData.mpData->AddRef();
+}
+
+
+EA::Thread::Thread& EA::Thread::Thread::operator=(const Thread& t)
+{
+	// We don't synchronize access to mpData; we assume that the user 
+	// synchronizes it or this Thread instances is used from a single thread.
+	if(t.mThreadData.mpData)
+		t.mThreadData.mpData->AddRef();
+
+	if(mThreadData.mpData)
+		mThreadData.mpData->Release();
+
+	mThreadData = t.mThreadData;
+
+	return *this;
+}
+
+
+EA::Thread::Thread::~Thread()
+{
+	// We don't synchronize access to mpData; we assume that the user 
+	// synchronizes it or this Thread instances is used from a single thread.
+	if(mThreadData.mpData)
+		mThreadData.mpData->Release();
+}
+
+
+static void* RunnableFunctionInternal(void* pContext)
+{
+	// The parent thread is sharing memory with us and we need to 
+	// make sure our view of it is synchronized with the parent.
+	EAReadWriteBarrier();
+
+	EAThreadDynamicData* const pTDD        = (EAThreadDynamicData*)pContext; 
+	EA::Thread::RunnableFunction pFunction = (EA::Thread::RunnableFunction)pTDD->mpStartContext[0];
+	void* pCallContext                     = pTDD->mpStartContext[1];
+
+	pTDD->mThreadPid = 0;
+
+	// Lock the runtime mutex which is used to allow other threads to wait on this thread with a timeout.
+	pTDD->mRunMutex.Lock();         // Important that this be before the semaphore post.
+	pTDD->mStartedSemaphore.Post(); // Announce that the thread has started.
+	pTDD->mnStatus = EA::Thread::Thread::kStatusRunning;
+	pTDD->mpStackBase = EA::Thread::GetStackBase();
+
+#if !EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+	// Under Unix we need to set the thread name from the thread that is being named and not from an outside thread.
+	if(pTDD->mName[0])
+		SetCurrentThreadName(pTDD->mName);
+#endif
+
+	#ifdef EA_PLATFORM_ANDROID
+		AttachJavaThread();
+	#endif
+
+	if(pTDD->mpBeginThreadUserWrapper)
+	{
+		// If user wrapper is specified, call user wrapper and pass the pFunction and pContext.
+		EA::Thread::RunnableFunctionUserWrapper pWrapperFunction = (EA::Thread::RunnableFunctionUserWrapper)pTDD->mpBeginThreadUserWrapper;
+		pTDD->mnReturnValue = pWrapperFunction(pFunction, pCallContext);
+	}
+	else
+		pTDD->mnReturnValue = pFunction(pCallContext);
+
+	#ifdef EA_PLATFORM_ANDROID
+		DetachJavaThread();
+	#endif
+
+	void* pReturnValue = (void*)pTDD->mnReturnValue;
+	pTDD->mnStatus = EA::Thread::Thread::kStatusEnded;
+	pTDD->mRunMutex.Unlock();
+	pTDD->Release();
+
+	return pReturnValue;
+}
+
+static void* RunnableObjectInternal(void* pContext)
+{
+	EAThreadDynamicData* const pTDD  = (EAThreadDynamicData*)pContext; 
+	EA::Thread::IRunnable* pRunnable = (EA::Thread::IRunnable*)pTDD->mpStartContext[0];
+	void* pCallContext               = pTDD->mpStartContext[1];
+
+	pTDD->mThreadPid = 0;
+
+	pTDD->mRunMutex.Lock();         // Important that this be before the semaphore post.
+	pTDD->mStartedSemaphore.Post();
+
+	pTDD->mnStatus = EA::Thread::Thread::kStatusRunning;
+
+#if !EATHREAD_OTHER_THREAD_NAMING_SUPPORTED
+	// Under Unix we need to set the thread name from the thread that is being named and not from an outside thread.
+	if(pTDD->mName[0])
+		SetCurrentThreadName(pTDD->mName);
+#endif
+
+	#ifdef EA_PLATFORM_ANDROID
+		AttachJavaThread();
+	#endif
+
+	if(pTDD->mpBeginThreadUserWrapper)
+	{
+		// If user wrapper is specified, call user wrapper and pass the pFunction and pContext.
+		EA::Thread::RunnableClassUserWrapper pWrapperClass = (EA::Thread::RunnableClassUserWrapper)pTDD->mpBeginThreadUserWrapper;
+		pTDD->mnReturnValue = pWrapperClass(pRunnable, pCallContext);
+	}
+	else
+		pTDD->mnReturnValue = pRunnable->Run(pCallContext);
+
+	#ifdef EA_PLATFORM_ANDROID
+		DetachJavaThread();
+	#endif
+
+	void* const pReturnValue = (void*)pTDD->mnReturnValue;
+	pTDD->mnStatus = EA::Thread::Thread::kStatusEnded;
+	pTDD->mRunMutex.Unlock();
+	pTDD->Release();
+
+	return pReturnValue;
+}
+
+void EA::Thread::Thread::SetAffinityMask(EA::Thread::ThreadAffinityMask nAffinityMask)
+{
+	if(mThreadData.mpData && mThreadData.mpData->mThreadId)
+	{
+		EA::Thread::SetThreadAffinityMask(mThreadData.mpData->mThreadId, nAffinityMask);
+	}
+}
+
+EA::Thread::ThreadAffinityMask EA::Thread::Thread::GetAffinityMask()
+{
+	if(mThreadData.mpData->mThreadId)
+	{
+		return mThreadData.mpData->mnThreadAffinityMask;
+	}
+
+	return kThreadAffinityMaskAny;
+}
+
+/// BeginThreadInternal
+/// Extraction of both RunnableFunction and RunnableObject EA::Thread::Begin in order to have thread initialization
+/// in one place
+static EA::Thread::ThreadId BeginThreadInternal(EAThreadData& mThreadData, void* pRunnableOrFunction, void* pContext, const EA::Thread::ThreadParameters* pTP,
+												void* pUserWrapper, void* (*InternalThreadFunction)(void*))
+{
+	using namespace EA::Thread;
+
+	// The parent thread is sharing memory with us and we need to
+	// make sure our view of it is synchronized with the parent.
+	EAReadWriteBarrier();
+
+	// Check there is an entry for the current thread context in our ThreadDynamicData array.
+	EA::Thread::ThreadId thisThreadId = EA::Thread::GetThreadId();
+	if(!FindThreadDynamicData(thisThreadId))
+	{
+		EAThreadDynamicData* pData = new(AllocateThreadDynamicData()) EAThreadDynamicData;
+		if(pData)
+		{
+			pData->AddRef(); // AddRef for ourselves, to be released upon this Thread class being deleted or upon Begin being called again for a new thread.
+							 // Do no AddRef for thread execution because this is not an EAThread managed thread.
+			pData->AddRef(); // AddRef for this function, to be released upon this function's exit.                
+			pData->mThreadId = thisThreadId;
+			pData->mSysThreadId = GetSysThreadId();
+			strncpy(pData->mName, "external", EATHREAD_NAME_SIZE);
+			pData->mName[EATHREAD_NAME_SIZE - 1] = 0;
+			pData->mpStackBase = EA::Thread::GetStackBase();
+		}
+	}
+	
+	if(mThreadData.mpData)
+		mThreadData.mpData->Release(); // Matches the "AddRef for ourselves" below.
+
+	// We use the pData temporary throughout this function because it's possible that mThreadData.mpData could be 
+	// modified as we are executing, in particular in the case that mThreadData.mpData is destroyed and changed 
+	// during execution.
+	EAThreadDynamicData* pData = new(AllocateThreadDynamicData()) EAThreadDynamicData; // Note that we use a special new here which doesn't use the heap.
+	EAT_ASSERT(pData);
+
+	if(pData)
+	{
+		mThreadData.mpData = pData;
+
+		pData->AddRef(); // AddRef for ourselves, to be released upon this Thread class being deleted or upon Begin being called again for a new thread.
+		pData->AddRef(); // AddRef for the thread, to be released upon the thread exiting.
+		pData->AddRef(); // AddRef for this function, to be released upon this function's exit.
+		pData->mThreadId = kThreadIdInvalid;
+		pData->mSysThreadId = kSysThreadIdInvalid;
+		pData->mThreadPid = 0;
+		pData->mnStatus = Thread::kStatusNone;
+		pData->mpStartContext[0] = pRunnableOrFunction;
+		pData->mpStartContext[1] = pContext;
+		pData->mpBeginThreadUserWrapper = pUserWrapper;
+		pData->mStartupProcessor = pTP ? pTP->mnProcessor % EA::Thread::GetProcessorCount() : kProcessorDefault;
+		pData->mnThreadAffinityMask = pTP ? pTP->mnAffinityMask : kThreadAffinityMaskAny;
+		strncpy(pData->mName, (pTP && pTP->mpName) ? pTP->mpName : "", EATHREAD_NAME_SIZE);
+		pData->mName[EATHREAD_NAME_SIZE - 1] = 0;
+
+		// Pass NULL attribute pointer if there are no special setup steps
+		ScePthreadAttr* pCreationAttribs = NULL;
+		int result(0);
+
+		ScePthreadAttr creationAttribs;
+
+		scePthreadAttrInit(&creationAttribs);
+
+		// Sony has stated that we should call scePthreadAttrSetinheritsched, otherwise the 
+		// thread priority set up in pthread_attr_t gets ignored by the newly created thread.
+		scePthreadAttrSetinheritsched(&creationAttribs, SCE_PTHREAD_EXPLICIT_SCHED);
+
+		if(pData->mStartupProcessor == EA::Thread::kProcessorAny)
+		{
+			if(pData->mnThreadAffinityMask == kThreadAffinityMaskAny)
+			// Unless you specifically set the thread affinity to SCE_KERNEL_CPUMASK_USER_ALL,
+			// Sony apparently assigns your thread to a single CPU.
+				scePthreadAttrSetaffinity(&creationAttribs, SCE_KERNEL_CPUMASK_USER_ALL);
+			else
+				scePthreadAttrSetaffinity(&creationAttribs, pData->mnThreadAffinityMask);
+		}
+		else if(pData->mStartupProcessor != kProcessorDefault)
+		{
+			SceKernelCpumask mask = (1 << pData->mStartupProcessor) & 0xFF;
+			scePthreadAttrSetaffinity(&creationAttribs, mask);
+		}
+
+		SetupThreadAttributes(creationAttribs, pTP);
+		pCreationAttribs = &creationAttribs;
+		
+		result = scePthreadCreate(&pData->mSysThreadId, pCreationAttribs, InternalThreadFunction, pData, mThreadData.mpData->mName);
+		
+		if(result == 0) // If success...
+		{
+			// NOTE:  This cast must match the caset that is done in EA::Thread::GetThreadId.
+			pData->mThreadId = *reinterpret_cast<EA::Thread::ThreadId*>(pData->mSysThreadId);
+
+			ThreadId threadIdTemp = pData->mThreadId; // Temp value because Release below might delete pData.
+
+			// If additional attributes were used, free initialization data.
+			if(pCreationAttribs)
+			{
+				result = scePthreadAttrDestroy(pCreationAttribs);
+				EAT_ASSERT(result == 0);
+			}
+
+			pData->Release(); // Matches AddRef for this function.
+			return threadIdTemp;
+		}
+
+		// If additional attributes were used, free initialization data
+		if(pCreationAttribs)
+		{
+			result = scePthreadAttrDestroy(pCreationAttribs);
+			EAT_ASSERT(result == 0);
+		}
+
+		pData->Release(); // Matches AddRef for "cleanup" above.
+		pData->Release(); // Matches AddRef for this Thread class above.
+		pData->Release(); // Matches AddRef for thread above.
+		mThreadData.mpData = NULL; // mThreadData.mpData == pData
+	}
+
+	return (ThreadId)kThreadIdInvalid;
+}
+
+
+EA::Thread::ThreadId EA::Thread::Thread::Begin(RunnableFunction pFunction, void* pContext,
+											   const ThreadParameters* pTP, RunnableFunctionUserWrapper pUserWrapper)
+{
+	ThreadId threadId = BeginThreadInternal(mThreadData, reinterpret_cast<void*>((uintptr_t)pFunction), pContext, pTP,
+											reinterpret_cast<void*>((uintptr_t)pUserWrapper), RunnableFunctionInternal);
+	return threadId;
+}
+
+
+EA::Thread::ThreadId EA::Thread::Thread::Begin(IRunnable* pRunnable, void* pContext,
+											   const ThreadParameters* pTP, RunnableClassUserWrapper pUserWrapper)
+{
+	ThreadId threadId = BeginThreadInternal(mThreadData, reinterpret_cast<void*>((uintptr_t)pRunnable), pContext, pTP,
+											reinterpret_cast<void*>((uintptr_t)pUserWrapper), RunnableObjectInternal);
+	return threadId;
+}
+
+
+EA::Thread::Thread::Status EA::Thread::Thread::WaitForEnd(const ThreadTime& timeoutAbsolute, intptr_t* pThreadReturnValue)
+{
+	// In order to support timeoutAbsolute, we don't just call pthread_disabled_join, as that's an infinitely blocking call.
+	// Instead we wait on a Mutex (with a timeout) which the running thread locked, and will unlock as it is exiting.
+	// Only after the successful Mutex lock do we call pthread_disabled_join, as we know that it won't block for an indeterminate
+	// amount of time (barring a thread priority inversion problem). If the user never calls WaitForEnd, then we 
+	// will eventually call pthread_disabled_detach in the EAThreadDynamicData destructor.
+
+	// The mThreadData memory is shared between threads and when 
+	// reading it we must be synchronized.
+	EAReadWriteBarrier();
+
+	// A mutex lock around mpData is not needed below because mpData is never allowed to go from non-NULL to NULL. 
+	// However, there is an argument that can be made for placing a memory read barrier before reading it.
+
+	if(mThreadData.mpData) // If this is non-zero then we must have created the thread.
+	{
+		// We must not call WaitForEnd from the thread we are waiting to end. 
+		// That would result in a deadlock, at least if the timeout was infinite.
+		EAT_ASSERT(mThreadData.mpData->mThreadId != EA::Thread::GetThreadId());
+
+		Status currentStatus = GetStatus();
+
+		if(currentStatus == kStatusNone) // If the thread hasn't started yet...
+		{
+			// The thread has not been started yet. Wait on the semaphore (which is posted when the thread actually starts executing).
+			Semaphore::Result result = (Semaphore::Result)mThreadData.mpData->mStartedSemaphore.Wait(timeoutAbsolute);
+			EAT_ASSERT(result != Semaphore::kResultError);
+
+			if(result >= 0) // If the Wait succeeded, as opposed to timing out...
+			{
+				// We know for sure that the thread status is running now.
+				currentStatus = kStatusRunning;
+				mThreadData.mpData->mStartedSemaphore.Post(); // Re-post the semaphore so that any other callers of WaitForEnd don't block on the Wait above.
+			}
+		} // fall through.
+
+		if(currentStatus == kStatusRunning) // If the thread has started but not yet exited...
+		{
+			// Lock on the mutex (which is available when the thread is exiting)
+			Mutex::Result result = (Mutex::Result)mThreadData.mpData->mRunMutex.Lock(timeoutAbsolute);
+			EAT_ASSERT(result != Mutex::kResultError);
+
+			if(result > 0) // If the Lock succeeded, as opposed to timing out... then the thread has exited or is in the process of exiting.
+			{
+				// Do a pthread_disabled join. This is a blocking call, but we know that it will end very soon, 
+				// as the mutex unlock the thread did is done right before the thread returns to the OS.
+				// The return value of pthread_disabled_join has information that isn't currently useful to us.
+				scePthreadJoin(mThreadData.mpData->mSysThreadId, NULL);
+				mThreadData.mpData->mThreadId = kThreadIdInvalid;
+
+				// We know for sure that the thread status is ended now.
+				currentStatus = kStatusEnded;
+				mThreadData.mpData->mRunMutex.Unlock();
+			}
+			// Else the Lock timed out, which means that the thread didn't exit before we ran out of time.
+			// In this case we need to return to the user that the status is kStatusRunning.
+		}
+		else
+		{
+			// Else currentStatus == kStatusEnded.
+			scePthreadJoin(mThreadData.mpData->mSysThreadId, NULL);
+			mThreadData.mpData->mThreadId = kThreadIdInvalid;
+		}
+
+		if(currentStatus == kStatusEnded)
+		{
+			// Call GetStatus again to get the thread return value.
+			currentStatus = GetStatus(pThreadReturnValue);
+		}
+
+		return currentStatus;
+	}
+	else
+	{
+		// Else the user hasn't started the thread yet, so we wait until the user starts it.
+		// Ideally, what we really want to do here is wait for some kind of signal. 
+		// Instead for the time being we do a polling loop. 
+		while((!mThreadData.mpData || (mThreadData.mpData->mThreadId == kThreadIdInvalid)) && (GetThreadTime() < timeoutAbsolute))
+		{
+			ThreadSleep(1);
+			EAReadWriteBarrier();
+			EACompilerMemoryBarrier();
+		}
+
+		if(mThreadData.mpData)
+			return WaitForEnd(timeoutAbsolute);
+	}
+
+	return kStatusNone; 
+}
+
+
+EA::Thread::Thread::Status EA::Thread::Thread::GetStatus(intptr_t* pThreadReturnValue) const
+{
+	if(mThreadData.mpData)
+	{
+		EAReadBarrier();
+		Status status = (Status)mThreadData.mpData->mnStatus;
+
+		if(pThreadReturnValue && (status == kStatusEnded))
+			*pThreadReturnValue = mThreadData.mpData->mnReturnValue;
+
+		return status;
+	}
+
+	return kStatusNone;
+}
+
+
+EA::Thread::ThreadId EA::Thread::Thread::GetId() const
+{
+	// A mutex lock around mpData is not needed below because 
+	// mpData is never allowed to go from non-NULL to NULL. 
+	if(mThreadData.mpData)
+		return mThreadData.mpData->mThreadId;
+
+	return kThreadIdInvalid;
+}
+
+
+int EA::Thread::Thread::GetPriority() const
+{
+	// A mutex lock around mpData is not needed below because 
+	// mpData is never allowed to go from non-NULL to NULL. 
+	if(mThreadData.mpData)
+	{
+		int         policy;
+		sched_param param;
+
+		int result = scePthreadGetschedparam(mThreadData.mpData->mSysThreadId, &policy, &param);
+
+		if(result == 0)
+			return ConvertFromNativePriority(param, policy);
+
+		return kThreadPriorityDefault;
+	}
+
+	return kThreadPriorityUnknown;
+}
+
+
+bool EA::Thread::Thread::SetPriority(int nPriority)
+{
+	// A mutex lock around mpData is not needed below because 
+	// mpData is never allowed to go from non-NULL to NULL. 
+	EAT_ASSERT(nPriority != kThreadPriorityUnknown);
+
+	if(mThreadData.mpData)
+	{
+		int         policy;
+		sched_param param;
+
+		int result = scePthreadGetschedparam(mThreadData.mpData->mSysThreadId, &policy, &param);
+
+		if(result == 0) // If success...
+		{
+			ConvertToNativePriority(nPriority, param, policy);
+
+			result = scePthreadSetschedparam(mThreadData.mpData->mSysThreadId, policy, &param);
+		}
+
+		return (result == 0);
+	}
+
+	return false;
+}
+
+
+// To consider: Make it so we return a value.
+void EA::Thread::Thread::SetProcessor(int nProcessor)
+{
+	if(mThreadData.mpData)
+	{
+		mThreadData.mpData->mStartupProcessor = nProcessor; // Assign this in case the thread hasn't started yet and thus we are leaving it a message to set it when it has started.
+		SetPlatformThreadAffinity(mThreadData.mpData);
+	}
+}
+
+
+void EA::Thread::Thread::Wake()
+{
+	// Todo: implement this. The solution is to use a signal to wake the sleeping thread via an EINTR.
+	// Possibly use the SIGCONT signal. Have to look into this to tell what the best approach is.
+}
+
+
+const char* EA::Thread::Thread::GetName() const 
+{ 
+	return mThreadData.mpData ? mThreadData.mpData->mName : ""; 
+}
+
+
+void EA::Thread::Thread::SetName(const char* pName)
+{
+	if(mThreadData.mpData && pName)
+		SetThreadName(mThreadData.mpData->mThreadId, pName);
+}
+
+

+ 687 - 0
source/libunwind/eathread_callstack_libunwind.cpp

@@ -0,0 +1,687 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread.h>
+#include <eathread/eathread_atomic.h>
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <eathread/eathread_storage.h>
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <unwind.h>
+
+#if defined(EA_PLATFORM_BSD)
+	#include <sys/signal.h>
+	#include <machine/signal.h>
+#elif defined(EA_PLATFORM_LINUX)
+	#include <signal.h>
+#endif
+
+
+namespace EA
+{
+namespace Thread
+{
+
+
+///////////////////////////////////////////////////////////////////////////////
+// InitCallstack
+//
+EATHREADLIB_API void InitCallstack()
+{
+	// Nothing needed.
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ShutdownCallstack
+//
+EATHREADLIB_API void ShutdownCallstack()
+{
+	// Nothing needed.
+}
+
+
+EATHREADLIB_API void GetInstructionPointer(void*& p)
+{
+	// Currently all platforms that have <unwind.h> have __builtin_return_address().
+	p = __builtin_return_address(0);
+}
+
+
+
+// This is a callback function which libunwind calls, once per callstack entry.
+/*
+Linux for ARM:
+	enum _Unwind_Reason_Code {
+		_URC_OK = 0,                        // operation completed successfully
+		_URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+		_URC_END_OF_STACK = 5,
+		_URC_HANDLER_FOUND = 6,
+		_URC_INSTALL_CONTEXT = 7,
+		_URC_CONTINUE_UNWIND = 8,
+		_URC_FAILURE = 9                    // unspecified failure of some kind
+	};
+
+	#define _URC_NO_REASON _URC_OK
+
+BSD (and I think also Linux for x86/x64): 
+	enum _Unwind_Reason_Code {
+		_URC_NO_REASON = 0,
+		_URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+		_URC_FATAL_PHASE2_ERROR = 2,
+		_URC_FATAL_PHASE1_ERROR = 3,
+		_URC_NORMAL_STOP = 4,
+		_URC_END_OF_STACK = 5,
+		_URC_HANDLER_FOUND = 6,
+		_URC_INSTALL_CONTEXT = 7,
+		_URC_CONTINUE_UNWIND = 8
+	};
+*/
+
+struct UnwindCallbackContext
+{
+	void** mpReturnAddressArray;
+	size_t mReturnAddressArrayCapacity;
+	size_t mReturnAddressArrayIndex;
+};
+
+static _Unwind_Reason_Code UnwindCallback(_Unwind_Context* pUnwindContext, void* pUnwindCallbackContextVoid)
+{
+	UnwindCallbackContext* pUnwindCallbackContext = (UnwindCallbackContext*)pUnwindCallbackContextVoid;
+
+	if(pUnwindCallbackContext->mReturnAddressArrayIndex < pUnwindCallbackContext->mReturnAddressArrayCapacity)
+	{
+		uintptr_t ip = _Unwind_GetIP(pUnwindContext);
+		pUnwindCallbackContext->mpReturnAddressArray[pUnwindCallbackContext->mReturnAddressArrayIndex++] = (void*)ip;
+		return _URC_NO_REASON;
+	}
+
+	#if defined(EA_PLATFORM_LINUX)
+		return _URC_NO_REASON; // Is there a way to tell the caller that we want to stop?
+	#else
+		return _URC_NORMAL_STOP;
+	#endif
+}
+
+
+
+
+
+/*
+
+The following commented-out code is for reading the callstack of a thread other than the current one.
+The code below is originally for BSD Unix, and probably needs to be tweaked to support Linux.
+
+namespace Local
+{
+	enum EAThreadBacktraceState
+	{
+		// Positive thread lwp ids are here implicitly.
+		EATHREAD_BACKTRACE_STATE_NONE    = -1,
+		EATHREAD_BACKTRACE_STATE_DUMPING = -2,
+		EATHREAD_BACKTRACE_STATE_DONE    = -3,
+		EATHREAD_BACKTRACE_STATE_CANCEL  = -4
+	};
+
+	struct ThreadBacktraceState
+	{
+		EA::Thread::AtomicInt32 mState;              // One of enum EAThreadBacktraceState or (initially) the thread id of the thread we are targeting.
+		void**                  mCallstack;          // Output param
+		size_t                  mCallstackCapacity;  // Input param, refers to array capacity of mCallstack.
+		size_t                  mCallstackCount;     // Output param
+		pthread_t               mPthread;            // Output param
+
+		ThreadBacktraceState() : mState(EATHREAD_BACKTRACE_STATE_NONE), mCallstackCapacity(0), mCallstackCount(0), mPthread(NULL){}
+	};
+
+
+	static pthread_mutex_t      gThreadBacktraceMutex = PTHREAD_MUTEX_INITIALIZER;
+	static ThreadBacktraceState gThreadBacktraceState; // Protected by gThreadBacktraceMutex.
+
+
+	static void gThreadBacktraceSignalHandler(int sigNum, siginfo_t* pSigInfo, void* pSigContextVoid)
+	{
+		int32_t lwpSelf = *(int32_t*)pthread_self();
+
+		if(gThreadBacktraceState.mState.SetValueConditional(EATHREAD_BACKTRACE_STATE_DUMPING, lwpSelf))
+		{
+			gThreadBacktraceState.mPthread = pthread_self();
+
+			if(gThreadBacktraceState.mCallstackCapacity)
+			{
+				gThreadBacktraceState.mCallstackCount = GetCallstack(gThreadBacktraceState.mCallstack, gThreadBacktraceState.mCallstackCapacity, (const CallstackContext*)NULL);
+
+				// At this point we need to remove the top 6 entries and insert an entry for where the thread's instruction pointer is.
+				if(gThreadBacktraceState.mCallstackCount >= 6) // This should always be true.
+				{
+					gThreadBacktraceState.mCallstackCount -= 5;
+					memmove(&gThreadBacktraceState.mCallstack[1], &gThreadBacktraceState.mCallstack[6], (gThreadBacktraceState.mCallstackCount - 1) * sizeof(void*));
+				}
+				else
+					gThreadBacktraceState.mCallstackCount = 1;
+
+				gThreadBacktraceState.mCallstack[0] = pSigContextVoid ? reinterpret_cast<void*>(reinterpret_cast<sigcontext*>((uintptr_t)pSigContextVoid + 48)->sc_rip) : NULL;
+			}
+			else
+				gThreadBacktraceState.mCallstackCount = 0;
+
+			gThreadBacktraceState.mState.SetValue(EATHREAD_BACKTRACE_STATE_DONE);
+		}
+		// else this thread received an unexpected SIGURG. This can happen if it was so delayed that 
+		// we timed out waiting for it to happen and moved on.
+	}
+}
+
+
+/// GetCallstack
+///
+/// This is a version of GetCallstack which gets the callstack of a thread based on its thread id as opposed to 
+/// its register state. It works by injecting a signal handler into the given thread and reading the self callstack
+/// then exiting from the signal handler. The GetCallstack function sets this up, generates the signal for the 
+/// other thread, then waits for it to complete. It uses the SIGURG signal for this.
+///
+/// Primary causes of failure:
+///     The target thread has SIGURG explicitly ignored.
+///     The target thread somehow is getting too little CPU time to respond to the signal.
+///
+/// To do: Change this function to take a ThreadInfo as a last parameter instead of pthread_t. And have the 
+/// ThreadInfo return additional basic thread information. Or maybe even change this function to be a 
+/// GetThreadInfo function instead of GetCallstack.
+///
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, pthread_t& pthread)
+{
+	using namespace Local;
+
+	size_t callstackCount = 0;
+
+	if(pthread)
+	{
+		pthread_t pthreadSelf    = pthread_self();
+		int32_t   lwp            = *(int32_t*)pthread;
+		int32_t   lwpSelf        = *(int32_t*)pthreadSelf;
+
+		if(lwp == lwpSelf) // This function can be called only for a thread other than self.
+			callstackCount = GetCallstack(pReturnAddressArray, nReturnAddressArrayCapacity, (const CallstackContext*)NULL);
+		else
+		{
+			struct sigaction act;   memset(&act, 0, sizeof(act));
+			struct sigaction oact;  memset(&oact, 0, sizeof(oact));
+	
+			act.sa_sigaction = gThreadBacktraceSignalHandler;
+			act.sa_flags     = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
+
+			pthread_mutex_lock(&gThreadBacktraceMutex);
+
+			if(sigaction(SIGURG, &act, &oact) == 0)
+			{
+				gThreadBacktraceState.mCallstack         = pReturnAddressArray;
+				gThreadBacktraceState.mCallstackCapacity = nReturnAddressArrayCapacity;
+				gThreadBacktraceState.mState.SetValue(lwp);
+
+				// Signal the specific thread that we want to dump.
+				int32_t stateTemp = lwp;
+
+				if(pthread_kill(pthread, SIGURG) == 0)
+				{
+					// Wait for the other thread to start dumping the stack, or time out.
+					for(int waitMS = 200; waitMS; waitMS--)
+					{
+						stateTemp = gThreadBacktraceState.mState.GetValue();
+
+						if(stateTemp != lwp)
+							break;
+
+						usleep(1000); // This sleep gives the OS the opportunity to execute the target thread, even if it's of a lower priority than this thread.
+					}
+				} 
+				// else apparently failed to send SIGURG to the thread, or the thread was paused in a way that it couldn't receive it.
+
+				if(stateTemp == lwp) // If the operation timed out or seemingly never started...
+				{
+					if(gThreadBacktraceState.mState.SetValueConditional(EATHREAD_BACKTRACE_STATE_CANCEL, lwp)) // If the backtrace still didn't start, and we were able to stop it by setting the state to cancel...
+						stateTemp = EATHREAD_BACKTRACE_STATE_CANCEL;
+					else
+						stateTemp = gThreadBacktraceState.mState.GetValue();    // It looks like the backtrace thread did in fact get a late start and is now executing
+				}
+
+				// Wait indefinitely for the dump to finish or be canceled.
+				// We cannot apply a timeout here because the other thread is accessing state that
+				// is owned by this thread.
+				for(int waitMS = 100; (stateTemp == EATHREAD_BACKTRACE_STATE_DUMPING) && waitMS; waitMS--) // If the thread is (still) busy writing it out its callstack...
+				{
+					usleep(1000);
+					stateTemp = gThreadBacktraceState.mState.GetValue();
+				}
+
+				if(stateTemp == EATHREAD_BACKTRACE_STATE_DONE)
+					callstackCount = gThreadBacktraceState.mCallstackCount;
+				// Else give up on it. It's OK to just fall through.
+
+				// Restore the original SIGURG handler.
+				sigaction(SIGURG, &oact, NULL);
+			}
+
+			pthread_mutex_unlock(&gThreadBacktraceMutex);
+		}
+	}
+
+	return callstackCount;
+}
+*/
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstack
+//
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+{
+	// libunwind can only read the stack from the current thread.
+	// However, we can accomplish this for another thread by injecting a signal handler into that thread.
+	// See the EAThreadBacktrace() function source code above.
+
+	if(pContext == NULL) // If reading the current thread's context...
+	{
+		UnwindCallbackContext context = { pReturnAddressArray, nReturnAddressArrayCapacity, 0 };
+		_Unwind_Backtrace(&UnwindCallback, &context);
+
+		if (context.mReturnAddressArrayIndex > 0)
+		{
+			--context.mReturnAddressArrayIndex; // Remove the first entry, because it refers to this function and by design we don't include this function.
+			memmove(pReturnAddressArray, pReturnAddressArray + 1, context.mReturnAddressArrayIndex * sizeof(void*));
+		}
+
+		return context.mReturnAddressArrayIndex;
+	}
+
+	// We don't yet have a means to read another thread's callstack via only the CallstackContext.
+	return 0;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
+{
+	// True Linux-based ARM platforms (usually tablets and phones) can use pthread_attr_getstack.
+	#if defined(EA_PLATFORM_ANDROID)
+		if((threadId == (intptr_t)kThreadIdInvalid) || 
+		   (threadId == (intptr_t)kThreadIdCurrent) || 
+		   (threadId == (intptr_t)EA::Thread::GetThreadId()))
+		{
+			// Note: the behavior below is inconsistent between platforms and needs to be made so.
+			#if defined(__ARMCC_VERSION) // If using the ARM Compiler...
+				context.mSP           = (uint32_t)__current_sp();
+				context.mLR           = (uint32_t)__return_address();
+				context.mPC           = (uint32_t)__current_pc();
+				context.mStackPointer = context.mSP;
+
+			#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)                
+				#if defined(EA_PROCESSOR_X86_64)
+					context.mRIP          = (uint64_t)__builtin_return_address(0);
+					context.mRSP          = (uint64_t)__builtin_frame_address(1);
+					context.mRBP          = 0;
+					context.mStackPointer = context.mRSP;
+
+				#elif defined(EA_PROCESSOR_X86)
+					context.mEIP          = (uint32_t)__builtin_return_address(0);
+					context.mESP          = (uint32_t)__builtin_frame_address(1);
+					context.mEBP          = 0;
+					context.mStackPointer = context.mESP;
+
+				#elif defined(EA_PROCESSOR_ARM32)
+					// register uintptr_t current_sp asm ("sp");
+					context.mSP = (uint32_t)__builtin_frame_address(0);
+					context.mLR = (uint32_t)__builtin_return_address(0);
+
+					void* pInstruction;
+					EAGetInstructionPointer(pInstruction);
+					context.mPC = reinterpret_cast<uintptr_t>(pInstruction);
+
+					context.mStackPointer = context.mSP;
+
+				#elif defined(EA_PROCESSOR_ARM64)
+					// register uintptr_t current_sp asm ("sp");
+					context.mSP = (uint64_t)__builtin_frame_address(0);
+					context.mLR = (uint64_t)__builtin_return_address(0);
+
+					void* pInstruction;
+					EAGetInstructionPointer(pInstruction);
+					context.mPC = reinterpret_cast<uintptr_t>(pInstruction);
+
+					context.mStackPointer = context.mSP;
+				#endif
+			#endif
+
+			context.mStackBase  = (uintptr_t)GetStackBase();
+			context.mStackLimit = (uintptr_t)GetStackLimit();
+
+			return true;
+		}
+		else
+		{
+			// Else haven't implemented getting the stack info for other threads
+			memset(&context, 0, sizeof(context));
+			return false;
+		}        
+
+	#else
+		pthread_t self      = pthread_self();
+		pthread_t pthreadId = (typeof(pthread_t))threadId; // Requires that pthread_t is a pointer or integral type.
+
+		if(pthread_equal(pthreadId, self))
+		{
+			void* pInstruction;
+
+			// This is some crazy GCC code that happens to work:
+			pInstruction = ({ __label__ label; label: &&label; });
+
+			// Note: the behavior below is inconsistent between platforms and needs to be made so.
+			#if defined(EA_PROCESSOR_X86_64)
+				context.mRIP = (uint64_t)pInstruction;
+				context.mRSP = (uint64_t)__builtin_frame_address(1);
+				context.mRBP = 0;
+
+			#elif defined(EA_PROCESSOR_X86)
+				context.mEIP = (uint32_t)__builtin_return_address(0);
+				context.mESP = (uint32_t)__builtin_frame_address(1);
+				context.mEBP = 0;
+			#endif
+
+			return true;
+		}
+		else
+		{
+			// There is currently no way to do this.
+			memset(&context, 0, sizeof(context));
+			return false;
+		}
+	#endif
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContextSysThreadId
+//
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
+{
+	// Assuming we are using pthreads, sysThreadId == threadId.
+	return GetCallstackContext(context, sysThreadId);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
+{
+	#if defined(EA_PROCESSOR_X86_64)
+		context.mRIP = pContext->Rip;
+		context.mRSP = pContext->Rsp;
+		context.mRBP = pContext->Rbp;
+	#elif defined(EA_PROCESSOR_X86)
+		context.mEIP = pContext->Eip;
+		context.mESP = pContext->Esp;
+		context.mEBP = pContext->Ebp;
+	#elif defined(EA_PROCESSOR_ARM32)
+		context.mSP  = pContext->mGpr[13];
+		context.mLR  = pContext->mGpr[14];
+		context.mPC  = pContext->mGpr[15];
+	#elif defined(EA_PROCESSOR_ARM64)
+		context.mSP  = pContext->mGpr[31];
+		context.mLR  = pContext->mGpr[30];
+		context.mPC  = pContext->mPC;
+	#else
+		// To do.
+	#endif
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleFromAddress
+//
+// Returns the required strlen of pModuleName.
+//
+EATHREADLIB_API size_t GetModuleFromAddress(const void* address, char* pModuleName, size_t moduleNameCapacity)
+{
+	#if 0 // Disabled until testable: defined(EA_PLATFORM_LINUX)
+		// The output of reading /proc/self/maps is like the following (there's no leading space on each line).
+		// We look for entries that have r-x as the first three flags, as they are executable modules.
+		// The format is (http://linux.die.net/man/5/proc):
+		// <begin address>-<end address> <flags> <offset> <device major>:<device minor> <inode> <path>
+		//
+		// 00400000-0040b000 r-xp 00000000 08:01 655382                             /bin/cat
+		// 0060a000-0060b000 r--p 0000a000 08:01 655382                             /bin/cat
+		// 0060b000-0060c000 rw-p 0000b000 08:01 655382                             /bin/cat
+		// 0060c000-0062d000 rw-p 00000000 00:00 0                                  [heap]
+		// 7ffff77b5000-7ffff7a59000 r--p 00000000 08:01 395618                     /usr/lib/locale/locale-archive
+		// 7ffff7a59000-7ffff7bd3000 r-xp 00000000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7bd3000-7ffff7dd2000 ---p 0017a000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7dd2000-7ffff7dd6000 r--p 00179000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7dd6000-7ffff7dd7000 rw-p 0017d000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7dd7000-7ffff7ddc000 rw-p 00000000 00:00 0 
+		// 7ffff7ddc000-7ffff7dfc000 r-xp 00000000 08:01 1062651                    /lib/ld-2.12.1.so
+		// 7ffff7fd9000-7ffff7fdc000 rw-p 00000000 00:00 0 
+		// 7ffff7ff9000-7ffff7ffb000 rw-p 00000000 00:00 0 
+		// 7ffff7ffb000-7ffff7ffc000 r-xp 00000000 00:00 0                          [vdso]
+		// 7ffff7ffc000-7ffff7ffd000 r--p 00020000 08:01 1062651                    /lib/ld-2.12.1.so
+		// 7ffff7ffd000-7ffff7ffe000 rw-p 00021000 08:01 1062651                    /lib/ld-2.12.1.so
+		// 7ffff7ffe000-7ffff7fff000 rw-p 00000000 00:00 0 
+		// 7ffffffde000-7ffffffff000 rw-p 00000000 00:00 0                          [stack]
+		// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
+
+		FILE* file = fopen("/proc/self/maps", "rt"); 
+
+		if(file)
+		{
+			uint64_t address64 = (uint64_t)reinterpret_cast<uintptr_t>(address);
+			char     lineBuffer[1024]; 
+
+			while(fgets(lineBuffer, sizeof(lineBuffer), file) != NULL) 
+			{ 
+				size_t lineLength = strlen(lineBuffer); 
+
+				if((lineLength > 0) && (lineBuffer[lineLength - 1] == '\n'))
+					lineBuffer[--lineLength] = '\0'; 
+
+				uint64_t start, end, offset, devMajor, devMinor, inode;
+				char     flags[4]; 
+				char     path[512 + 1]; 
+
+				// 7ffff7ddc000-7ffff7dfc000 r-xp 00000000 08:01 1062651 /lib/ld-2.12.1.so
+				int fieldCount = EA::StdC::Sscanf(lineBuffer, "%I64x-%I64x %c%c%c%c %I64x %I64d:%I64d %I64x %512s", 
+									&start, &end, &flags[0], &flags[1], &flags[2], &flags[3], &offset,
+									&devMajor, &devMinor, &inode, path);
+				if(fieldCount == 11) 
+				{ 
+					if((flags[0] == 'r') && (flags[1] == '-') && (flags[2] == 'x')) // If this looks like an executable module...
+					{
+						if((address64 >= start) && (address64 < end)) // If this is the module that corresponds to the input address
+						{
+							// We can't strcpy path as-is because it might be truncated due to spaces in the file name.
+							// So we get the location path is in the original lineBuffer and strcpy everything till the end.
+							char* pPathBegin = EA::StdC::Strstr(lineBuffer, path);
+
+							return EA::StdC::Strlcpy(pModuleName, pPathBegin, moduleNameCapacity);
+						}
+					}
+				}
+			}
+
+			fclose(file);
+		}
+	#else
+		EA_UNUSED(address);
+
+		// Probably also doable for BSD.
+		// http://freebsd.1045724.n5.nabble.com/How-to-get-stack-bounds-of-current-process-td4053477.html
+
+	#endif
+
+	if(moduleNameCapacity > 0)
+		pModuleName[0] = 0;
+
+	return 0;
+}
+
+
+/*
+	uint64_t GetLibraryAddressLinux(const char* pModuleName) 
+	{
+		// The output of reading /proc/self/maps is like the following (there's no leading space on each line).
+		// We look for entries that have r-x as the first three flags, as they are executable modules.
+		// The format is (http://linux.die.net/man/5/proc):
+		// <begin address>-<end address> <flags> <offset> <device major>:<device minor> <inode> <path>
+		//
+		// 00400000-0040b000 r-xp 00000000 08:01 655382                             /bin/cat
+		// 0060a000-0060b000 r--p 0000a000 08:01 655382                             /bin/cat
+		// 0060b000-0060c000 rw-p 0000b000 08:01 655382                             /bin/cat
+		// 0060c000-0062d000 rw-p 00000000 00:00 0                                  [heap]
+		// 7ffff77b5000-7ffff7a59000 r--p 00000000 08:01 395618                     /usr/lib/locale/locale-archive
+		// 7ffff7a59000-7ffff7bd3000 r-xp 00000000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7bd3000-7ffff7dd2000 ---p 0017a000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7dd2000-7ffff7dd6000 r--p 00179000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7dd6000-7ffff7dd7000 rw-p 0017d000 08:01 1062643                    /lib/libc-2.12.1.so
+		// 7ffff7dd7000-7ffff7ddc000 rw-p 00000000 00:00 0 
+		// 7ffff7ddc000-7ffff7dfc000 r-xp 00000000 08:01 1062651                    /lib/ld-2.12.1.so
+		// 7ffff7fd9000-7ffff7fdc000 rw-p 00000000 00:00 0 
+		// 7ffff7ff9000-7ffff7ffb000 rw-p 00000000 00:00 0 
+		// 7ffff7ffb000-7ffff7ffc000 r-xp 00000000 00:00 0                          [vdso]
+		// 7ffff7ffc000-7ffff7ffd000 r--p 00020000 08:01 1062651                    /lib/ld-2.12.1.so
+		// 7ffff7ffd000-7ffff7ffe000 rw-p 00021000 08:01 1062651                    /lib/ld-2.12.1.so
+		// 7ffff7ffe000-7ffff7fff000 rw-p 00000000 00:00 0 
+		// 7ffffffde000-7ffffffff000 rw-p 00000000 00:00 0                          [stack]
+		// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
+
+		uint64_t baseAddress = 0;
+		FILE*    file = fopen("/proc/self/maps", "rt"); 
+
+		if(file)
+		{
+			size_t moduleNameLength = strlen(pModuleName); 
+			char   lineBuffer[512]; 
+
+			while(fgets(lineBuffer, sizeof lineBuffer, file) != NULL) 
+			{ 
+				size_t lineLength = strlen(lineBuffer); 
+
+				if((lineLength > 0) && (lineBuffer[lineLength - 1] == '\n'))
+					lineBuffer[--lineLength] = '\0'; 
+
+				if((lineLength >= moduleNameLength) && 
+					memcmp(lineBuffer + lineLength - moduleNameLength, pModuleName, moduleNameLength) == 0)
+				{
+					uint64_t start, end, offset; 
+					char     flags[4]; 
+
+					if(EA::StdC::Sscanf(lineBuffer, "%I64x-%I64x %c%c%c%c %I64x", &start, &end, 
+								 &flags[0], &flags[1], &flags[2], &flags[3], &offset) == 7) 
+					{ 
+						if((flags[0] == 'r') && (flags[1] == '-') && (flags[2] == 'x')) // If this looks like an executable module...
+						{ 
+							// Note: I don't understand from the Linux documentation what the 'offset' value really means
+							// and how we are supposed to use it. Example code shows it being subtracted from offset, though 
+							// offset is usually 0.
+							baseAddress = (start - offset); 
+							break; 
+						}
+					}
+				}
+			}
+
+			fclose(file);
+		}
+
+		return baseAddress; 
+   } 
+*/
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleHandleFromAddress
+//
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* /*pAddress*/)
+{
+	// This is doable for Linux-based platforms via fopen("/proc/self/maps")
+	// Probably also doable for BSD.
+	// http://freebsd.1045724.n5.nabble.com/How-to-get-stack-bounds-of-current-process-td4053477.html
+	// Not currently implemented for the given platform.
+	return 0;
+}
+
+
+
+EA::Thread::ThreadLocalStorage sStackBase;
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* pStackBase)
+{
+	if(pStackBase)
+		sStackBase.SetValue(pStackBase);
+	else
+	{
+		pStackBase = __builtin_frame_address(0);
+
+		if(pStackBase)
+			SetStackBase(pStackBase);
+		// Else failure; do nothing.
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	void* pBase;
+
+	if(GetPthreadStackInfo(&pBase, NULL))
+		return pBase;
+
+	// Else we require the user to have set this previously, usually via a call 
+	// to SetStackBase() in the start function of this currently executing 
+	// thread (or main for the main thread).
+	pBase = sStackBase.GetValue();
+
+	if(pBase == NULL)
+		pBase = (void*)(((uintptr_t)&pBase + 4095) & ~4095); // Make a guess, round up to next 4096.
+
+	return pBase;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	void* pLimit;
+
+	if(GetPthreadStackInfo(NULL, &pLimit))
+		return pLimit;
+
+	pLimit = __builtin_frame_address(0);
+
+	return (void*)((uintptr_t)pLimit & ~4095); // Round down to nearest page, as the stack grows downward.
+}
+
+
+
+} // namespace Thread
+} // namespace EA
+
+
+
+
+
+
+

+ 122 - 0
source/null/eathread_callstack_null.cpp

@@ -0,0 +1,122 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <eathread/eathread_storage.h>
+#include <string.h>
+
+
+#if defined(_MSC_VER)
+	#pragma warning(disable: 4172) // returning address of local variable or temporary
+#endif
+
+
+namespace EA
+{
+namespace Thread
+{
+
+
+EATHREADLIB_API void InitCallstack()
+{
+}
+
+EATHREADLIB_API void ShutdownCallstack()
+{
+}
+
+EATHREADLIB_API size_t GetCallstack(void* /*callstack*/[], size_t /*maxDepth*/, const CallstackContext* /*pContext*/)
+{
+	return 0;
+}
+
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& /*context*/, intptr_t /*threadId*/)
+{
+	return false;
+}
+
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& /*context*/, intptr_t /*sysThreadId*/)
+{
+	return false;
+}
+
+EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* /*pContext*/)
+{
+	memset(&context, 0, sizeof(context));
+}
+
+EATHREADLIB_API size_t GetModuleFromAddress(const void* /*pAddress*/, char* /*pModuleFileName*/, size_t /*moduleNameCapacity*/)
+{
+	return 0;
+}
+
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* /*pAddress*/)
+{
+	return (ModuleHandle)0;
+}
+
+
+#if EA_THREADS_AVAILABLE
+	static EA::Thread::ThreadLocalStorage sStackBase;
+#else
+	static void* sStackBase;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* pStackBase)
+{
+	if(pStackBase)
+	{
+		#if EA_THREADS_AVAILABLE
+			sStackBase.SetValue(pStackBase);
+		#else
+			sStackBase = pStackBase;
+		#endif
+	}
+	else
+	{
+		pStackBase = GetStackBase();
+		SetStackBase(pStackBase);
+	}
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	#if EA_THREADS_AVAILABLE
+		void* pStackBase = sStackBase.GetValue();
+	#else
+		void* pStackBase = sStackBase;
+	#endif
+
+	if(!pStackBase)
+		pStackBase = (void*)(((uintptr_t)GetStackLimit() + 4095) & ~4095); // Align up to nearest page, as the stack grows downward.
+
+	return pStackBase;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	void* pStack = NULL;
+
+	pStack = &pStack;
+
+	return (void*)((uintptr_t)pStack & ~4095); // Round down to nearest page, as the stack grows downward.
+}
+
+} // namespace Thread
+} // namespace EA
+
+
+

+ 140 - 0
source/openkode/eathread_semaphore_openkode.cpp

@@ -0,0 +1,140 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread_semaphore.h>
+
+
+#if EA_OPENKODE_THREADS_AVAILABLE
+	#include <time.h>
+	#include <errno.h>
+	#include <string.h>
+	#include <limits.h>
+	#include <KD/kd.h>
+
+
+	EASemaphoreData::EASemaphoreData()
+	  : mpSemaphore(NULL),
+		mnCount(0),
+		mnMaxCount(INT_MAX)
+	{
+	}
+
+
+	EA::Thread::SemaphoreParameters::SemaphoreParameters(int initialCount, bool bIntraProcess, const char* /*pName*/)
+	  : mInitialCount(initialCount),
+		mMaxCount(INT_MAX),
+		mbIntraProcess(bIntraProcess) // OpenKODE doesn't support inter-process semaphores.
+	{
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(const SemaphoreParameters* pSemaphoreParameters, bool bDefaultParameters)
+	{
+		if(!pSemaphoreParameters && bDefaultParameters)
+		{
+			SemaphoreParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pSemaphoreParameters);
+	}
+
+
+	EA::Thread::Semaphore::Semaphore(int initialCount)
+	{
+		SemaphoreParameters parameters(initialCount);
+		Init(&parameters);
+ 
+	}
+
+
+	EA::Thread::Semaphore::~Semaphore()
+	{
+		const KDint result = kdThreadSemFree(mSemaphoreData.mpSemaphore); (void)result;
+		EAT_ASSERT(result == 0);
+	}
+
+
+	bool EA::Thread::Semaphore::Init(const SemaphoreParameters* pSemaphoreParameters)
+	{
+		if(pSemaphoreParameters)
+		{
+			mSemaphoreData.mnCount        = pSemaphoreParameters->mInitialCount;
+			mSemaphoreData.mnMaxCount     = pSemaphoreParameters->mMaxCount;
+			mSemaphoreData.mpSemaphore    = kdThreadSemCreate((KDuint)mSemaphoreData.mnCount);
+
+			return (mSemaphoreData.mpSemaphore != NULL);
+		}
+
+		return false;
+	}
+
+
+	int EA::Thread::Semaphore::Wait(const ThreadTime& timeoutAbsolute)
+	{
+		KDint result = kdThreadSemWait(mSemaphoreData.mpSemaphore);
+
+		if(result != 0)
+		{
+			EAT_ASSERT(false); // This is an error condition.
+			return kResultError;
+		}
+
+		EAT_ASSERT(mSemaphoreData.mnCount > 0);
+		return (int)mSemaphoreData.mnCount.Decrement(); // AtomicInt32 operation. Note that the value of the semaphore count could change from the returned value by the time the caller reads it. This is fine but the user should understand this.
+	}
+
+
+	int EA::Thread::Semaphore::Post(int count)
+	{
+		// Some systems have a sem_post_multiple which we could take advantage 
+		// of here to atomically post multiple times.
+		EAT_ASSERT(mSemaphoreData.mnCount >= 0);
+
+		// It's hard to correctly implement mnMaxCount here, given that it 
+		// may be modified by multiple threads during this execution. So if you want
+		// to use max-count with an IntraProcess semaphore safely then you need to 
+		// post only from a single thread, or at least a single thread at a time.
+		
+		int currentCount = mSemaphoreData.mnCount;
+
+		// If count would cause an overflow exit early
+		if ((mSemaphoreData.mnMaxCount - count) < currentCount)
+			return kResultError;
+
+				currentCount += count;
+
+		while(count-- > 0)
+		{
+			++mSemaphoreData.mnCount;     // AtomicInt32 operation.
+
+			if(kdThreadSemPost(mSemaphoreData.mpSemaphore) != 0)
+			{
+				--mSemaphoreData.mnCount; // AtomicInt32 operation.
+				EAT_ASSERT(false);
+				return kResultError;        
+			}
+		}
+
+		// If all count posts occurred...
+		return currentCount; // It's possible that another thread may have modified this value since we changed it, but that's not important.
+	}
+
+
+	int EA::Thread::Semaphore::GetCount() const
+	{
+		return (int)mSemaphoreData.mnCount;
+	}
+
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+
+
+
+

+ 536 - 0
source/pc/eathread_callstack_win32.cpp

@@ -0,0 +1,536 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <eathread/eathread_storage.h>
+
+#if defined(EA_PLATFORM_WIN32) && EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP) // The following only works on Win32 and not Win64.
+
+#if defined(_MSC_VER)
+	#pragma warning(push, 0)
+#endif
+
+#include <Windows.h>
+#include <DbgHelp.h>
+#include <stdio.h>
+
+#if defined(_MSC_VER)
+	#pragma warning(pop)
+#endif
+
+
+#ifdef _MSC_VER
+	#pragma warning(disable: 4740)      // flow in or out of inline asm code suppresses global optimization
+	#pragma comment(lib, "dbghelp.lib")
+	#pragma comment(lib, "psapi.lib")
+#endif
+
+
+typedef BOOL   (__stdcall *SYMINITIALIZE)(HANDLE, LPSTR, BOOL);
+typedef BOOL   (__stdcall *SYMCLEANUP)(HANDLE);
+typedef BOOL   (__stdcall *STACKWALK)(DWORD, HANDLE, HANDLE, LPSTACKFRAME, LPVOID,PREAD_PROCESS_MEMORY_ROUTINE, PFUNCTION_TABLE_ACCESS_ROUTINE,PGET_MODULE_BASE_ROUTINE, PTRANSLATE_ADDRESS_ROUTINE);
+typedef LPVOID (__stdcall *SYMFUNCTIONTABLEACCESS)(HANDLE, DWORD);
+typedef DWORD  (__stdcall *SYMGETMODULEBASE)(HANDLE, DWORD);
+typedef BOOL   (__stdcall *SYMGETSYMFROMADDR)(HANDLE, DWORD, PDWORD, PIMAGEHLP_SYMBOL);
+typedef BOOL   (__stdcall *SYMGETLINEFROMADDR)(HANDLE, DWORD, PDWORD, PIMAGEHLP_LINE);
+
+
+namespace // We construct an anonymous namespace because doing so keeps the definitions within it local to this module.
+{  
+	struct Win32DbgHelp
+	{
+		HMODULE                mhDbgHelp;
+		bool                   mbSymInitialized;
+		SYMINITIALIZE          mpSymInitialize;
+		SYMCLEANUP             mpSymCleanup;
+		STACKWALK              mpStackWalk;
+		SYMFUNCTIONTABLEACCESS mpSymFunctionTableAccess;
+		SYMGETMODULEBASE       mpSymGetModuleBase;
+		SYMGETSYMFROMADDR      mpSymGetSymFromAddr;
+		SYMGETLINEFROMADDR     mpSymGetLineFromAddr;
+
+		Win32DbgHelp() : mhDbgHelp(0), mbSymInitialized(false), mpSymInitialize(0), 
+						 mpSymCleanup(0), mpStackWalk(0), mpSymFunctionTableAccess(0), 
+						 mpSymGetModuleBase(0), mpSymGetSymFromAddr(0), mpSymGetLineFromAddr(0)
+		{
+			// Empty. The initialization is done externally, due to tricky startup/shutdown ordering issues.
+		}
+
+		~Win32DbgHelp()
+		{
+			// Empty. The shutdown is done externally, due to tricky startup/shutdown ordering issues.
+		}
+
+		void Init()
+		{
+			if(!mhDbgHelp)
+			{
+				mhDbgHelp = ::LoadLibraryA("DbgHelp.dll");
+				if(mhDbgHelp)
+				{
+					mpSymInitialize          = (SYMINITIALIZE)(uintptr_t)         ::GetProcAddress(mhDbgHelp, "SymInitialize");
+					mpSymCleanup             = (SYMCLEANUP)(uintptr_t)            ::GetProcAddress(mhDbgHelp, "SymCleanup");
+					mpStackWalk              = (STACKWALK)(uintptr_t)             ::GetProcAddress(mhDbgHelp, "StackWalk");
+					mpSymFunctionTableAccess = (SYMFUNCTIONTABLEACCESS)(uintptr_t)::GetProcAddress(mhDbgHelp, "SymFunctionTableAccess");
+					mpSymGetModuleBase       = (SYMGETMODULEBASE)(uintptr_t)      ::GetProcAddress(mhDbgHelp, "SymGetModuleBase");
+					mpSymGetSymFromAddr      = (SYMGETSYMFROMADDR)(uintptr_t)     ::GetProcAddress(mhDbgHelp, "SymGetSymFromAddr");
+					mpSymGetLineFromAddr     = (SYMGETLINEFROMADDR)(uintptr_t)    ::GetProcAddress(mhDbgHelp, "SymGetLineFromAddr");
+				}
+			}
+		}
+
+		void Shutdown()
+		{
+			if(mhDbgHelp)
+			{
+				if(mbSymInitialized && mpSymCleanup)
+					mpSymCleanup(::GetCurrentProcess());
+				::FreeLibrary(mhDbgHelp);
+			}
+		}
+	};
+
+	static int          sInitCount = 0;
+	static Win32DbgHelp sWin32DbgHelp;
+}
+
+
+
+
+
+namespace EA
+{
+namespace Thread
+{
+
+
+/* To consider: Enable usage of this below.
+///////////////////////////////////////////////////////////////////////////////
+// IsAddressReadable
+//
+static bool IsAddressReadable(const void* pAddress)
+{
+	bool bPageReadable;
+	MEMORY_BASIC_INFORMATION mbi;
+
+	if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
+	{
+		const DWORD flags = (PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_READONLY | PAGE_READWRITE);
+		bPageReadable = (mbi.State == MEM_COMMIT) && ((mbi.Protect & flags) != 0);
+	}
+	else
+		bPageReadable = false;
+
+	return bPageReadable;
+}
+*/
+
+
+///////////////////////////////////////////////////////////////////////////////
+// InitCallstack
+//
+EATHREADLIB_API void InitCallstack()
+{
+	if(++sInitCount == 1)
+		sWin32DbgHelp.Init();
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ShutdownCallstack
+//
+EATHREADLIB_API void ShutdownCallstack()
+{
+	if(--sInitCount == 0)
+		sWin32DbgHelp.Shutdown();
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstack
+//
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+{
+	size_t nEntryIndex(0);
+
+	if(!sWin32DbgHelp.mhDbgHelp)
+		sWin32DbgHelp.Init();
+
+	if(sWin32DbgHelp.mpStackWalk)
+	{
+		CONTEXT context;
+		memset(&context, 0, sizeof(context));
+		context.ContextFlags = CONTEXT_CONTROL;
+
+		if(pContext)
+		{
+			context.Eip = pContext->mEIP;
+			context.Esp = pContext->mESP;
+			context.Ebp = pContext->mEBP;
+		}
+		else
+		{
+		// RtlCaptureStackBackTrace can only generate stack traces on Win32 when the stack frame contains frame
+		// pointers.  This only a limitation on 32-bit Windows and is controlled by the following compilers switches.
+		//
+		// /Oy  : removes frame-pointers
+		// /Oy- : emits frame-pointers
+		// 
+		// The language is wierd here because Microsoft refers it as enabling/disabling an performance optimization.
+		// https://docs.microsoft.com/en-us/cpp/build/reference/oy-frame-pointer-omission?view=vs-2017
+		// 
+		// EATHREAD_WIN32_FRAME_POINTER_OPTIMIZATION_DISABLED is enabled/disabled based on if the user has requested eaconfig to disable
+		// frame-pointer optimizations (enable frame-pointers).  See property: 'eaconfig.disable_framepointer_optimization'.
+		//
+		#ifdef EATHREAD_WIN32_FRAME_POINTER_OPTIMIZATION_DISABLED
+			return RtlCaptureStackBackTrace(1, (ULONG)nReturnAddressArrayCapacity, pReturnAddressArray, NULL);
+		#else
+			// With VC++, EIP is not accessible directly, but we can use an assembly trick to get it.
+			// VC++ and Intel C++ compile this fine, but Metrowerks 7 has a bug and fails.
+			__asm{
+				mov context.Ebp, EBP
+					mov context.Esp, ESP
+					call GetEIP
+					GetEIP:
+					pop context.Eip
+			}
+		#endif
+		}
+
+		// Initialize the STACKFRAME structure for the first call. This is only
+		// necessary for Intel CPUs, and isn't mentioned in the documentation.
+		STACKFRAME sf;
+		memset(&sf, 0, sizeof(sf));
+		sf.AddrPC.Offset     = context.Eip;
+		sf.AddrPC.Mode       = AddrModeFlat;
+		sf.AddrStack.Offset  = context.Esp;
+		sf.AddrStack.Mode    = AddrModeFlat;
+		sf.AddrFrame.Offset  = context.Ebp;
+		sf.AddrFrame.Mode    = AddrModeFlat;
+
+		const HANDLE hCurrentProcess = ::GetCurrentProcess();
+		const HANDLE hCurrentThread  = ::GetCurrentThread();
+
+		// To consider: We have had some other code which can read the stack with better success
+		// than the DbgHelp stack walk function that we use here. In particular, the DbgHelp 
+		// stack walking function doesn't do well unless x86 stack frames are used.
+		for(int nStackIndex = 0; nEntryIndex < (nReturnAddressArrayCapacity - 1); ++nStackIndex)
+		{
+			if(!sWin32DbgHelp.mpStackWalk(IMAGE_FILE_MACHINE_I386, hCurrentProcess, hCurrentThread, 
+											&sf, &context, NULL, sWin32DbgHelp.mpSymFunctionTableAccess, 
+											sWin32DbgHelp.mpSymGetModuleBase, NULL))
+			{
+				break;
+			}
+
+			if(sf.AddrFrame.Offset == 0)  // Basic sanity check to make sure the frame is OK. Bail if not.
+				break;
+
+			// If using the current execution context, then we ignore the first 
+			// one because it is the one that is our stack walk function itself.
+			if(pContext || (nStackIndex > 0)) 
+				pReturnAddressArray[nEntryIndex++] = ((void*)(uintptr_t)sf.AddrPC.Offset);
+		}
+	}
+
+	pReturnAddressArray[nEntryIndex] = 0;
+	return nEntryIndex;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+EATHREADLIB_API void GetCallstackContext(CallstackContext& context, const Context* pContext)
+{
+	#if defined(EA_PLATFORM_WIN32)
+		EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::Context, Eip) == offsetof(CONTEXT, Eip));
+		EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::Context, SegSs) == offsetof(CONTEXT, SegSs));
+	#endif
+
+	context.mEIP = pContext->Eip;
+	context.mESP = pContext->Esp;
+	context.mEBP = pContext->Ebp;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleFromAddress
+//
+EATHREADLIB_API size_t GetModuleFromAddress(const void* address, char* pModuleName, size_t moduleNameCapacity)
+{
+	MEMORY_BASIC_INFORMATION mbi;
+
+	if(VirtualQuery(address, &mbi, sizeof(mbi)))
+	{
+		HMODULE hModule = (HMODULE)mbi.AllocationBase;
+
+		if(hModule)
+			return GetModuleFileNameA(hModule, pModuleName, (DWORD)moduleNameCapacity);
+	}
+
+	pModuleName[0] = 0;
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleHandleFromAddress
+//
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pAddress)
+{
+	MEMORY_BASIC_INFORMATION mbi;
+
+	if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
+		return (ModuleHandle)mbi.AllocationBase;
+
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetThreadIdFromThreadHandle
+//
+// This implementation is the same as the one in EAThread.
+//
+EATHREADLIB_API uint32_t GetThreadIdFromThreadHandle(intptr_t threadId)
+{
+	struct THREAD_BASIC_INFORMATION_WIN32
+	{
+		BOOL  ExitStatus;
+		PVOID TebBaseAddress;
+		DWORD UniqueProcessId;
+		DWORD UniqueThreadId;
+		DWORD AffinityMask;
+		DWORD Priority;
+		DWORD BasePriority;
+	};
+
+	static HMODULE hKernel32 = NULL;
+	if(!hKernel32)
+		hKernel32 = LoadLibraryA("kernel32.dll");
+
+	if(hKernel32)
+	{
+		typedef DWORD (WINAPI *GetThreadIdFunc)(HANDLE);
+
+		static GetThreadIdFunc pGetThreadIdFunc = NULL;
+		if(!pGetThreadIdFunc)
+		   pGetThreadIdFunc = (GetThreadIdFunc)(uintptr_t)GetProcAddress(hKernel32, "GetThreadId");
+
+		if(pGetThreadIdFunc)
+			return pGetThreadIdFunc((HANDLE)threadId);
+	}
+
+
+	static HMODULE hNTDLL = NULL; 
+	if(!hNTDLL)
+		hNTDLL = LoadLibraryA("ntdll.dll");
+
+	if(hNTDLL)
+	{
+		typedef LONG (WINAPI *NtQueryInformationThreadFunc)(HANDLE, int, PVOID, ULONG, PULONG);
+
+		static NtQueryInformationThreadFunc pNtQueryInformationThread = NULL;
+		if(!pNtQueryInformationThread)
+		   pNtQueryInformationThread = (NtQueryInformationThreadFunc)(uintptr_t)GetProcAddress(hNTDLL, "NtQueryInformationThread");
+
+		if(pNtQueryInformationThread)
+		{
+			THREAD_BASIC_INFORMATION_WIN32 tbi;
+
+			if(pNtQueryInformationThread((HANDLE)threadId, 0, &tbi, sizeof(tbi), NULL) == 0)
+				return tbi.UniqueThreadId;
+		}
+	}
+
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+// The threadId is the same thing as the Windows' HANDLE GetCurrentThread() function
+// and not the same thing as Windows' GetCurrentThreadId function. See the 
+// GetCallstackContextSysThreadId for the latter.
+// 
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
+{
+	if((threadId == (intptr_t)kThreadIdInvalid) || (threadId == (intptr_t)kThreadIdCurrent))
+		threadId = (intptr_t)::GetCurrentThread(); // GetCurrentThread returns a thread 'pseudohandle' and not a real thread handle.
+
+	const DWORD sysThreadId        = EA::Thread::GetThreadIdFromThreadHandle(threadId);
+	const DWORD sysThreadIdCurrent = ::GetCurrentThreadId();
+	CONTEXT     win32CONTEXT;
+	NT_TIB*     pTib;
+
+	if(sysThreadIdCurrent == sysThreadId)
+	{
+		// With VC++, EIP is not accessible directly, but we can use an assembly trick to get it.
+		// VC++ and Intel C++ compile this fine, but Metrowerks 7 has a bug and fails.
+			__asm{
+				mov win32CONTEXT.Ebp, EBP
+				mov win32CONTEXT.Esp, ESP
+				call GetEIP
+				GetEIP:
+				pop win32CONTEXT.Eip
+			}
+
+		// Offset 0x18 from the FS segment register gives a pointer to
+		// the thread information block for the current thread
+		__asm {
+			mov eax, fs:[18h]
+			mov pTib, eax
+		}
+	}
+	else
+	{
+		// In this case we are working with a separate thread, so we suspend it
+		// and read information about it and then resume it.
+		::SuspendThread((HANDLE)threadId);
+		win32CONTEXT.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS;
+		::GetThreadContext((HANDLE)threadId, &win32CONTEXT);
+		// TODO: This has not been tested!
+		pTib = *((NT_TIB**)(win32CONTEXT.SegFs * 16 + 18));
+		::ResumeThread((HANDLE)threadId);
+	}
+
+	context.mEBP = (uint32_t)win32CONTEXT.Ebp;
+	context.mESP = (uint32_t)win32CONTEXT.Esp;
+	context.mEIP = (uint32_t)win32CONTEXT.Eip;
+	context.mStackBase    = (uintptr_t)pTib->StackBase;
+	context.mStackLimit   = (uintptr_t)pTib->StackLimit;
+	context.mStackPointer = (uintptr_t)win32CONTEXT.Esp;
+
+	return true;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContextSysThreadId
+//
+// A sysThreadId is a Microsoft DWORD thread id, which can be obtained from 
+// the currently running thread via GetCurrentThreadId. It can be obtained from
+// a Microsoft thread HANDLE via EA::Thread::GetThreadIdFromThreadHandle();
+// A DWORD thread id can be converted to a thread HANDLE via the Microsoft OpenThread
+// system function.
+//
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
+{
+	bool        bReturnValue       = true;
+	const DWORD sysThreadIdCurrent = ::GetCurrentThreadId();
+	CONTEXT     win32CONTEXT;
+
+	if(sysThreadIdCurrent == (DWORD)sysThreadId)
+	{
+		// With VC++, EIP is not accessible directly, but we can use an assembly trick to get it.
+		// VC++ and Intel C++ compile this fine, but Metrowerks 7 has a bug and fails.
+			__asm{
+				mov win32CONTEXT.Ebp, EBP
+				mov win32CONTEXT.Esp, ESP
+				call GetEIP
+				GetEIP:
+				pop win32CONTEXT.Eip
+			}
+	}
+	else
+	{
+		// In this case we are working with a separate thread, so we suspend it
+		// and read information about it and then resume it.
+		HANDLE threadId = ::OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT, TRUE, (DWORD)sysThreadId);
+
+		if(threadId)
+		{
+			::SuspendThread(threadId);
+			win32CONTEXT.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+			::GetThreadContext(threadId, &win32CONTEXT);
+			::ResumeThread(threadId);
+
+			::CloseHandle(threadId);
+		}
+		else
+		{
+			memset(&win32CONTEXT, 0, sizeof(win32CONTEXT));
+			bReturnValue = false;
+		}
+	}
+
+	context.mEBP = (uint32_t)win32CONTEXT.Ebp;
+	context.mESP = (uint32_t)win32CONTEXT.Esp;
+	context.mEIP = (uint32_t)win32CONTEXT.Eip;
+  //context.mStackBase    = (uintptr_t)pTib->StackBase;     // To do. (Whoever added mStackBase to CallstackContext forgot to add this code)
+  //context.mStackLimit   = (uintptr_t)pTib->StackLimit;
+  //context.mStackPointer = (uintptr_t)win32CONTEXT.Esp;
+
+	return bReturnValue;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* /*pStackBase*/)
+{
+	// Nothing to do, as GetStackBase always works on its own.
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	CallstackContext context;
+
+	GetCallstackContext(context, 0);
+	return (void*)context.mStackBase;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	CallstackContext context;
+
+	GetCallstackContext(context, 0);
+	return (void*)context.mStackLimit;
+
+	// Alternative which returns a slightly different answer:
+	// We return our stack pointer, which is a good approximation of the stack limit of the caller.
+	// void* pStack = NULL;
+	// __asm { mov pStack, ESP};
+	// return pStack;
+}
+
+
+} // namespace Thread
+} // namespace EA
+
+#else // Stub out function for WinRT / Windows Phone 8
+
+namespace EA
+{
+namespace Thread
+{
+
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+{
+	EA_UNUSED(pContext);
+	EA_UNUSED(pReturnAddressArray);
+	EA_UNUSED(nReturnAddressArrayCapacity);
+
+	return 0;
+}
+
+} // namespace Thread
+} // namespace EA
+
+#endif // defined(EA_PLATFORM_WIN32)
+

+ 622 - 0
source/pc/eathread_callstack_win64.cpp

@@ -0,0 +1,622 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <eathread/eathread_callstack.h>
+#include <eathread/eathread_callstack_context.h>
+#include <stdio.h>
+#include <string.h>
+#include <eathread/eathread_storage.h>
+
+#if defined(_WIN32_WINNT) && (_WIN32_WINNT < 0x0500)
+	#undef  _WIN32_WINNT
+	#define _WIN32_WINNT 0x0500
+#endif
+
+#ifdef _MSC_VER
+	#pragma warning(push, 0)
+	#include <Windows.h>
+	#include <math.h>       // VS2008 has an acknowledged bug that requires math.h (and possibly also string.h) to be #included before intrin.h.
+	#include <intrin.h>
+	#pragma intrinsic(_ReturnAddress)
+	#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+		#include <winternl.h>
+	#else
+		// Temporary while waiting for formal support:
+		extern "C" NTSYSAPI PEXCEPTION_ROUTINE NTAPI RtlVirtualUnwind(DWORD, DWORD64, DWORD64, PRUNTIME_FUNCTION, PCONTEXT, PVOID*, PDWORD64, PKNONVOLATILE_CONTEXT_POINTERS);
+		extern "C" WINBASEAPI DWORD WINAPI GetModuleFileNameA(HMODULE, LPSTR, DWORD);
+	#endif
+	#pragma warning(pop)
+#else
+	#include <Windows.h>
+	#include <winternl.h>
+#endif
+
+
+// Disable optimization of this code under VC++ for x64.
+// This is due to some as-yet undetermined crash that happens  
+// when compiler optimizations are enabled for this code.
+// This function is not performance-sensitive and so disabling 
+// optimizations shouldn't matter.
+#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_WIN64))
+	#pragma optimize("", off) 
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Stuff that is supposed to be in windows.h and/or winternl.h but isn't
+// consistently present in all versions.
+//
+#ifndef UNW_FLAG_NHANDLER
+	#define UNW_FLAG_NHANDLER 0
+#endif
+
+#ifndef UNWIND_HISTORY_TABLE_SIZE
+	extern "C"
+	{
+		#define UNWIND_HISTORY_TABLE_SIZE    12
+		#define UNWIND_HISTORY_TABLE_NONE     0
+		#define UNWIND_HISTORY_TABLE_GLOBAL   1
+		#define UNWIND_HISTORY_TABLE_LOCAL    2
+
+		typedef struct _UNWIND_HISTORY_TABLE_ENTRY
+		{
+			ULONG64 ImageBase;
+			PRUNTIME_FUNCTION FunctionEntry;
+		} UNWIND_HISTORY_TABLE_ENTRY, *PUNWIND_HISTORY_TABLE_ENTRY;
+
+
+		typedef struct _UNWIND_HISTORY_TABLE
+		{
+			ULONG Count;
+			UCHAR Search;
+			ULONG64 LowAddress;
+			ULONG64 HighAddress;
+			UNWIND_HISTORY_TABLE_ENTRY Entry[UNWIND_HISTORY_TABLE_SIZE];
+		} UNWIND_HISTORY_TABLE, *PUNWIND_HISTORY_TABLE;
+
+
+		PVOID WINAPI RtlLookupFunctionEntry(ULONG64 ControlPC, PULONG64 ImageBase, PUNWIND_HISTORY_TABLE HistoryTable);
+
+		#if !defined(_MSC_VER) || (_MSC_VER < 1500) // if earlier than VS2008...
+			typedef struct _KNONVOLATILE_CONTEXT_POINTERS
+			{ 
+				PULONGLONG dummy; 
+			} KNONVOLATILE_CONTEXT_POINTERS, *PKNONVOLATILE_CONTEXT_POINTERS; 
+
+			typedef struct _FRAME_POINTERS
+			{
+				ULONGLONG MemoryStackFp;
+				ULONGLONG BackingStoreFp;
+			} FRAME_POINTERS, *PFRAME_POINTERS;
+
+			ULONGLONG WINAPI RtlVirtualUnwind(ULONG HandlerType, ULONGLONG ImageBase, ULONGLONG ControlPC, 
+													  PRUNTIME_FUNCTION FunctionEntry, PCONTEXT ContextRecord, PBOOLEAN InFunction, 
+													  PFRAME_POINTERS EstablisherFrame, PKNONVOLATILE_CONTEXT_POINTERS ContextPointers);
+		#endif
+	}
+#endif
+
+extern "C" WINBASEAPI DWORD WINAPI GetThreadId(_In_ HANDLE hThread);
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+namespace EA
+{
+namespace Thread
+{
+
+
+/* To consider: Enable usage of this below.
+///////////////////////////////////////////////////////////////////////////////
+// IsAddressReadable
+//
+static bool IsAddressReadable(const void* pAddress)
+{
+	bool bPageReadable;
+	MEMORY_BASIC_INFORMATION mbi;
+
+	if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
+	{
+		const DWORD flags = (PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_READONLY | PAGE_READWRITE);
+		bPageReadable = (mbi.State == MEM_COMMIT) && ((mbi.Protect & flags) != 0);
+	}
+	else
+		bPageReadable = false;
+
+	return bPageReadable;
+}
+*/
+
+
+#if !EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+	// GetRSP
+	//
+	// Returns the RSP of the caller.
+	// 
+	// We could also solve this with the following asm function.
+	// .CODE
+	//      GetRSP PROC
+	//      mov rax, rsp
+	//      add rax, 8
+	//      ret
+	//      GetRSP ENDP
+	//      END
+	//
+	static EA_NO_INLINE void* GetRSP()
+	{
+		#if defined(_MSC_VER)
+			uintptr_t ara = (uintptr_t)_AddressOfReturnAddress();
+		#else
+			uintptr_t ara = (uintptr_t)__builtin_frame_address();
+		#endif
+		return (void*)(ara + 8);
+	}
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetInstructionPointer
+//
+EATHREADLIB_API EA_NO_INLINE void GetInstructionPointer(void*& pInstruction)
+{
+	#if defined(_MSC_VER)
+		pInstruction = _ReturnAddress();
+	#elif defined(__GNUC__) || defined(EA_COMPILER_CLANG)
+		pInstruction = __builtin_return_address(0);
+	#else
+		void* pReturnAddressArray[2] = { 0, 0 };
+
+		GetCallstack(pReturnAddressArray, 2, NULL);
+		pInstruction = pReturnAddressArray[1]; // This is the address of the caller.
+	#endif
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// InitCallstack
+//
+EATHREADLIB_API void InitCallstack()
+{
+	// Nothing needed.
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// ShutdownCallstack
+//
+EATHREADLIB_API void ShutdownCallstack()
+{
+	// Nothing needed.
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstack
+//
+// With the x64 (a.k.a. x86-64) platform, the CPU supports call stack tracing
+// natively, by design. This is as opposed to the x86 platform, in which call
+// stack tracing (a.k.a. unwinding) is a crap-shoot. The Win64 OS provides
+// two functions in particular that take care of the primary work of stack
+// tracing: RtlLookupFunctionEntry and RtlVirtualUnwind/RtlUnwindEx.
+//
+// On x64 each non-leaf function must have an info struct (unwind metadata) in 
+// static memory associated with it. That info struct describes the prologue and  
+// epilogue of the function in such a way as tell identify where its return address
+// is stored and how to restore non-volatile registers of the caller so that
+// an unwind can happen during an exception and C++ object destructors can 
+// be called, etc. In order to implement a stack unwinding function for 
+// Microsoft x64, you can go the old x86 route of requiring the compiler to
+// emit stack frames and reading the stack frame values. But that would work 
+// only where the frames were enabled (maybe just debug builds) and wouldn't
+// work with third party code that didn't use the frames. But the Microsoft
+// x64 ABI -requires- that all non-leaf functions have the info struct 
+// described above. And Microsoft provides the Rtl functions mentioned above 
+// to read the info struct (RtlLookupFunctionEntry) and use it to unwind a 
+// frame (RtlVirtualUnwind/RtlUnwindEx), whether you are in an exception or not. 
+// 
+// RtlVirtualUnwind implements a virtual (pretend) unwind of a stack and is 
+// useful for reading a call stack and its unwind info without necessarily 
+// executing an unwind (like in an exception handler). RtlVirtualUnwind provides 
+// the infrastructure upon which higher level exception and unwind handling 
+// support is implemented. It doesn't exist on x86, as x86 exception unwinding
+// is entirely done by generated C++ code and isn't in the ABI. The Virtual in 
+// RtlVirtualUnwind has nothing to do with virtual memory, virtual functions, 
+// or virtual disks.
+//
+// RtlUnwindEx (replaces RtlUnwind) implements an actual unwind and thus is 
+// mostly useful only in the implementation of an exception handler and not 
+// for doing an ad-hoc stack trace.
+//
+// You can't use RtlLookupFunctionEntry on the IP (instruction pointer) of a 
+// leaf function, as the compiler isn't guaranteed to generate this info for 
+// such functions. But if a leaf function calls RtlLookupFunctionEntry on its
+// own IP then it's no longer a leaf function and by virtue of calling RtlLookupFunctionEntry
+// the info will necessarily be generated by the compiler. If you want to read
+// the info associated with an IP of another function which may be a leaf 
+// function, it's best to read the return address of that associated with that
+// function's callstack context, which is that that function's rsp register's
+// value as a uintptr_t* dereferenced (i.e. rsp holds the address of the 
+// return address).
+//
+// UNWIND_HISTORY_TABLE "is used as a cache to speed up repeated exception handling lookups, 
+// and is typically optional as far as usage with RtlUnwindEx goes – though certainly 
+// recommended from a performance perspective." This may be useful to us, though we'd need
+// to make it a thread-safe static variable or similar and not a local variable.
+// History table declaration and preparation for use, which needs to be done per-thread:
+//     UNWIND_HISTORY_TABLE unwindHistoryTable;
+//     RtlZeroMemory(&unwindHistoryTable, sizeof(UNWIND_HISTORY_TABLE));
+//     unwindHistoryTable.Unwind = TRUE;
+// To do: Implement usage of the history table for faster callstack tracing.
+//
+// Reading for anybody wanting to understand this:
+//     http://www.nynaeve.net/?p=105
+//     http://www.nynaeve.net/?p=106
+//     http://blogs.msdn.com/b/freik/archive/2005/03/17/398200.aspx
+//     http://www.codemachine.com/article_x64deepdive.html
+//     http://blogs.msdn.com/b/ntdebugging/archive/2010/05/12/x64-manual-stack-reconstruction-and-stack-walking.aspx
+//     http://eli.thegreenplace.net/2011/09/06/stack-frame-layout-on-x86-64/
+//
+EATHREADLIB_API size_t GetCallstack(void* pReturnAddressArray[], size_t nReturnAddressArrayCapacity, const CallstackContext* pContext)
+{
+	CONTEXT           context;
+	PRUNTIME_FUNCTION pRuntimeFunction;
+	ULONG64           nImageBase = 0;
+	ULONG64           nPrevImageBase = 0;
+	size_t            nFrameIndex = 0;
+
+	if(pContext)
+	{
+		RtlZeroMemory(&context, sizeof(context));
+		context.Rip          = pContext->mRIP;
+		context.Rsp          = pContext->mRSP;
+		context.Rbp          = pContext->mRBP;
+		context.ContextFlags = CONTEXT_CONTROL; // CONTEXT_CONTROL actually specifies SegSs, Rsp, SegCs, Rip, and EFlags. But for callstack tracing and unwinding, all that matters is Rip and Rsp.
+
+		// In the case where we are calling 0, we might be able to unwind one frame and see if we are now in a valid stack frame for 
+		// callstack generation. If not abort, otherwise we continue one frame past where the exception (calling 0) was performed
+		if (context.Rip == 0 && context.Rsp != 0)
+		{ 
+			context.Rip = (ULONG64)(*(PULONG64)context.Rsp); // To consider: Use IsAddressReadable(pFrame) before dereferencing this pointer.
+			context.Rsp += 8; // reset the stack pointer (+8 since we know there has been no prologue run requiring a larger number since RIP == 0)
+		} 
+
+		if(context.Rip && (nFrameIndex < nReturnAddressArrayCapacity))
+			pReturnAddressArray[nFrameIndex++] = (void*)(uintptr_t)context.Rip;
+	}
+	else // Else we are reading the current thread's callstack.
+	{
+		// To consider: Don't call the RtlCaptureContext function for EA_WINAPI_PARTITION_DESKTOP and 
+		// instead use the simpler version below it which writes Rip/Rsp/Rbp. RtlCaptureContext is much 
+		// slower. We need to verify that the 'quality' and extent of returned callstacks is good for 
+		// the simpler version before using it exclusively.
+		#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+			// Apparently there is no need to memset the context struct.
+			context.ContextFlags = CONTEXT_ALL; // Actually we should need only CONTEXT_INTEGER, so let's test that next chance we get.
+			RtlCaptureContext(&context);
+
+		#elif defined(EA_PLATFORM_CAPILANO) // This probably isn't limited to just this platform, but until we can test any other platforms we'll leave it at just this.
+			return RtlCaptureStackBackTrace(1, (ULONG)nReturnAddressArrayCapacity, pReturnAddressArray, NULL);
+
+		#else
+			void* ip = NULL;
+			EAGetInstructionPointer(ip);
+			context.Rip          = (uintptr_t)ip;
+			context.Rsp          = (uintptr_t)GetRSP();
+			context.Rbp          = 0; // RBP isn't actually needed for stack unwinding on x64, and don't typically need to use it in generated code, as the instruction set provides an efficient way to read/write via rsp offsets. Also, when frame pointers are omitted in the compiler settings then ebp won't be used.
+			context.ContextFlags = CONTEXT_CONTROL;
+		#endif
+	}
+
+	// The following loop intentionally skips the first call stack frame because 
+	// that frame corresponds this function (GetCallstack).
+	while(context.Rip && (nFrameIndex < nReturnAddressArrayCapacity))
+	{
+		// Try to look up unwind metadata for the current function.
+		nPrevImageBase = nImageBase;
+		__try
+		{
+			pRuntimeFunction = (PRUNTIME_FUNCTION)RtlLookupFunctionEntry(context.Rip, &nImageBase, NULL /*&unwindHistoryTable*/);
+		}
+		__except (EXCEPTION_EXECUTE_HANDLER)
+		{
+			// Something went wrong in RtlLookupFunctionEntry, and it is unknown
+			// if it is recoverable; so just get out.
+			return nFrameIndex;
+		}
+
+		if(pRuntimeFunction)
+		{
+			// RtlVirtualUnwind is not declared in the SDK headers for non-desktop apps, 
+			// but for 64 bit targets it's always present and appears to be needed by the
+			// existing RtlUnwindEx function. If in the end we can't use RtlVirtualUnwind
+			// and Microsoft doesn't provide an alternative, we can implement RtlVirtualUnwind
+			// ourselves manually (not trivial, but has the best results) or we can use
+			// the old style stack frame following, which works only when stack frames are 
+			// enabled in the build, which usually isn't so for optimized builds and for
+			// third party code. 
+
+			__try // Under at least the XBox One platform, RtlVirtualUnwind can crash here. It may possibly be due to the context being incomplete.
+			{
+				VOID*          handlerData = NULL;
+				ULONG64        establisherFramePointers[2] = { 0, 0 };
+				RtlVirtualUnwind(UNW_FLAG_NHANDLER, nImageBase, context.Rip, pRuntimeFunction, &context, &handlerData,  establisherFramePointers, NULL);                        
+			}
+			__except (EXCEPTION_EXECUTE_HANDLER)
+			{
+				context.Rip          = NULL;
+				context.ContextFlags = 0;
+			}
+		}
+		else
+		{
+			// If we don't have a RUNTIME_FUNCTION, then we've encountered an error of some sort (mostly likely only for cases of corruption) or leaf function (which doesn't make sense, given that we are moving up in the call sequence). Adjust the stack appropriately.
+			context.Rip  = (ULONG64)(*(PULONG64)context.Rsp); // To consider: Use IsAddressReadable(pFrame) before dereferencing this pointer.
+			context.Rsp += 8;
+		}
+
+		if(context.Rip)
+		{
+			if(nFrameIndex < nReturnAddressArrayCapacity)
+				pReturnAddressArray[nFrameIndex++] = (void*)(uintptr_t)context.Rip;
+		}
+	}
+
+	return nFrameIndex;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetThreadIdFromThreadHandle
+//
+// This implementation is the same as the one in EAThread.
+// Converts a thread HANDLE (threadId) to a thread id DWORD (sysThreadId).
+// Recall that Windows has two independent thread identifier types.
+//
+EATHREADLIB_API uint32_t GetThreadIdFromThreadHandle(intptr_t threadId)
+{
+	// Win64 has this function natively, unlike earlier versions of 32 bit Windows.
+	return (uint32_t)::GetThreadId((HANDLE)threadId);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+// The threadId is the same thing as the Windows' HANDLE GetCurrentThread() function
+// and not the same thing as Windows' GetCurrentThreadId function. See the 
+// GetCallstackContextSysThreadId for the latter.
+// 
+#if EA_USE_CPP11_CONCURRENCY
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, EA::Thread::ThreadId threadId)
+{
+	// Retrieve the Windows thread identifier from the std::thread::id structure.
+	// This is unavoidable because GetCallstackContextSysThreadId compares the value of 'sysThreadId' 
+	// against data from the Windows API function 'GetCurrentThreadId' which returns a Windows thread identifier.
+	// http://msdn.microsoft.com/en-us/library/windows/desktop/ms683183(v=vs.85).aspx
+	static_assert(sizeof(_Thrd_t) == sizeof(threadId), "We expect the 'std::thread::id' to have a single member of type '_Thrd_t'.");
+
+	_Thrd_t wThrd;
+	memcpy(&wThrd, &threadId, sizeof(wThrd));  // we use memcpy to avoid strict aliasing issues caused by casting to access internal members.
+	return GetCallstackContextSysThreadId(context, _Thr_val(wThrd));
+}
+#else
+EATHREADLIB_API bool GetCallstackContext(CallstackContext& context, intptr_t threadId)
+{
+	if((threadId == (intptr_t)kThreadIdInvalid) || (threadId == (intptr_t)kThreadIdCurrent))
+		threadId = (intptr_t)::GetCurrentThread(); // GetCurrentThread returns a thread 'pseudohandle' and not a real thread handle.
+
+	const DWORD sysThreadId = EA::Thread::GetThreadIdFromThreadHandle(threadId);
+
+	return GetCallstackContextSysThreadId(context, sysThreadId);
+}
+#endif
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContextSysThreadId
+//
+// A sysThreadId is a Microsoft DWORD thread id, which can be obtained from 
+// the currently running thread via GetCurrentThreadId. It can be obtained from
+// a Microsoft thread HANDLE via EA::Thread::GetThreadIdFromThreadHandle();
+// A DWORD thread id can be converted to a thread HANDLE via the Microsoft OpenThread
+// system function.
+//
+
+EA_DISABLE_VC_WARNING(4701) // potentially uninitialized local variable 'win64CONTEXT' used
+
+EATHREADLIB_API bool GetCallstackContextSysThreadId(CallstackContext& context, intptr_t sysThreadId)
+{
+	EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::ContextX86_64, Rip)                  == offsetof(CONTEXT, Rip));
+	EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::ContextX86_64, VectorRegister)       == offsetof(CONTEXT, VectorRegister));
+	EAT_COMPILETIME_ASSERT(offsetof(EA::Thread::ContextX86_64, LastExceptionFromRip) == offsetof(CONTEXT, LastExceptionFromRip));
+
+	const DWORD sysThreadIdCurrent = GetCurrentThreadId();
+	CONTEXT     win64CONTEXT;
+
+	if(sysThreadIdCurrent == (DWORD)sysThreadId) // If getting the context of the current thread...
+	{
+		#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+			RtlCaptureContext(&win64CONTEXT); // This function has no return value.
+		#else
+			void* ip = NULL;
+			EAGetInstructionPointer(ip);
+			win64CONTEXT.Rip          = (uintptr_t)ip;
+			win64CONTEXT.Rsp          = (uintptr_t)GetRSP();
+			win64CONTEXT.Rbp          = 0; // RBP isn't actually needed for stack unwinding on x64, and don't typically need to use it in generated code, as the instruction set provides an efficient way to read/write via rsp offsets. Also, when frame pointers are omitted in the compiler settings then ebp won't be used.
+			win64CONTEXT.ContextFlags = CONTEXT_CONTROL; // CONTEXT_CONTROL actually specifies SegSs, Rsp, SegCs, Rip, and EFlags. But for callstack tracing and unwinding, all that matters is Rip and Rsp.
+		#endif
+	}
+	else
+	{
+		#if !defined(EA_PLATFORM_WINDOWS) || EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP)
+			// In this case we are working with a separate thread, so we suspend it
+			// and read information about it and then resume it. We cannot use this
+			// technique to get the context of the current thread unless we do it by
+			// spawing a new thread which suspends this thread and calls GetThreadContext.
+
+			HANDLE threadId = OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT, TRUE, (DWORD)sysThreadId);
+			BOOL   result = false;
+
+			EAT_ASSERT(threadId != 0); // If this fails then maybe there's a process security restriction we are running into.
+			if(threadId)
+			{
+				DWORD suspendResult = SuspendThread(threadId);
+
+				if(suspendResult != (DWORD)-1)
+				{
+					win64CONTEXT.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+					result = GetThreadContext(threadId, &win64CONTEXT);
+					suspendResult = ResumeThread(threadId);
+					EAT_ASSERT(suspendResult != (DWORD)-1); EA_UNUSED(suspendResult);
+				}
+
+				CloseHandle(threadId);
+			}
+
+			if(!result)
+			{
+				win64CONTEXT.Rip          = 0;
+				win64CONTEXT.Rsp          = 0;
+				win64CONTEXT.Rbp          = 0;
+				win64CONTEXT.ContextFlags = 0;
+			}
+		#endif
+	}
+
+	context.mRIP = win64CONTEXT.Rip;
+	context.mRSP = win64CONTEXT.Rsp;
+	context.mRBP = win64CONTEXT.Rbp;
+
+	return (context.mRIP != 0);
+}
+
+EA_RESTORE_VC_WARNING()
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetCallstackContext
+//
+void GetCallstackContext(CallstackContext& context, const Context* pContext)
+{
+	context.mRIP = pContext->Rip;
+	context.mRSP = pContext->Rsp;
+	context.mRBP = pContext->Rbp;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleFromAddress
+//
+size_t GetModuleFromAddress(const void* address, char* pModuleName, size_t moduleNameCapacity)
+{
+	MEMORY_BASIC_INFORMATION mbi;
+
+	if(VirtualQuery(address, &mbi, sizeof(mbi)))
+	{
+		HMODULE hModule = (HMODULE)mbi.AllocationBase;
+
+		if(hModule)
+		{
+			#if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_DESKTOP) // GetModuleFileName is desktop API-only.
+				// As of the early Windows 8 SDKs, GetModuleFileName is not exposed to non-desktop 
+				// apps, though it's apparently nevertheless present in the libraries.
+				return GetModuleFileNameA(hModule, pModuleName, (DWORD)moduleNameCapacity);
+			#else
+				// If it turns out in the end that we really can't do this, then for non-shipping builds
+				// we can likely implement a manual version of this via information found through the 
+				// TEB structure for the process. 
+				return GetModuleFileNameA(hModule, pModuleName, (DWORD)moduleNameCapacity);
+			#endif
+		}
+	}
+
+	pModuleName[0] = 0;
+	return 0;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetModuleHandleFromAddress
+//
+// The input pAddress must be an address of code and not data or stack space.
+//
+EATHREADLIB_API ModuleHandle GetModuleHandleFromAddress(const void* pAddress)
+{
+	MEMORY_BASIC_INFORMATION mbi;
+
+	if(VirtualQuery(pAddress, &mbi, sizeof(mbi)))
+	{
+		// In Microsoft platforms, the module handle is really just a pointer
+		// to the code for the module. It corresponds directly to the information
+		// in the map file, though the actual address may have been changed
+		// from the value in the map file on loading into memory.
+		return (ModuleHandle)mbi.AllocationBase;
+	}
+
+	return 0;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// SetStackBase
+//
+EATHREADLIB_API void SetStackBase(void* /*pStackBase*/)
+{
+	// Nothing to do, as GetStackBase always works on its own.
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackBase
+//
+EATHREADLIB_API void* GetStackBase()
+{
+	NT_TIB64* pTIB = (NT_TIB64*)NtCurrentTeb(); // NtCurrentTeb is defined in <WinNT.h> as an inline call to __readgsqword
+	return (void*)pTIB->StackBase;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// GetStackLimit
+//
+EATHREADLIB_API void* GetStackLimit()
+{
+	NT_TIB64* pTIB = (NT_TIB64*)NtCurrentTeb(); // NtCurrentTeb is defined in <WinNT.h> as an inline call to __readgsqword
+	return (void*)pTIB->StackLimit;
+
+	// The following is an alternative implementation that returns the extent 
+	// of the current stack usage as opposed to the stack limit as seen by the OS. 
+	// This value will be a higher address than Tib.StackLimit (recall that the 
+	// stack grows downward). It's debatable which of these two approaches is
+	// better, as one returns the thread's -usable- stack space while the
+	// other returns how much the thread is -currently- using. The determination
+	// of the usable stack space is complicated by the fact that Microsoft 
+	// platforms auto-extend the stack if the process pushes beyond the current limit.
+	// In the end the Tib.StackLimit solution is actually the most portable across
+	// Microsoft OSs and compilers for those OSs (Microsoft or not).
+
+	// Alternative implementation:
+	// We return our stack pointer, which is a good approximation of the stack limit of the caller.
+	// void* rsp = GetRSP();
+	// return rsp;
+}
+
+
+} // namespace Thread
+} // namespace EA
+
+
+#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_WIN64))
+	#pragma optimize("", on) // See comments above regarding this optimization change.
+#endif
+
+
+
+

+ 221 - 0
source/pc/eathread_mutex_pc.cpp

@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include "EABase/eabase.h"
+#include "eathread/eathread_mutex.h"
+#include "eathread/eathread.h"
+
+#if defined(EA_PLATFORM_MICROSOFT)
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <Windows.h>
+	EA_RESTORE_ALL_VC_WARNINGS()
+#endif
+#ifdef CreateMutex
+	#undef CreateMutex // Windows #defines CreateMutex to CreateMutexA or CreateMutexW.
+#endif
+
+
+#ifdef _MSC_VER
+	#pragma warning(disable: 4996) // This function or variable may be unsafe / deprecated.
+#endif
+
+
+
+#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+	#if defined(EA_PLATFORM_WINDOWS)
+		extern "C" WINBASEAPI BOOL WINAPI TryEnterCriticalSection(_Inout_ LPCRITICAL_SECTION lpCriticalSection);
+	#endif
+
+	EAMutexData::EAMutexData()
+		: mnLockCount(0), mbIntraProcess(true)
+	{
+		#if EAT_ASSERT_ENABLED
+			mThreadId = EA::Thread::kThreadIdInvalid; 
+			mSysThreadId = EA::Thread::kSysThreadIdInvalid;
+		#endif
+
+		::memset(&mData, 0, sizeof(mData));
+	}
+
+
+	EA::Thread::MutexParameters::MutexParameters(bool bIntraProcess, const char* pName)
+		: mbIntraProcess(bIntraProcess)
+	{
+		if(pName)
+		{
+			strncpy(mName, pName, sizeof(mName)-1);
+			mName[sizeof(mName)-1] = 0;
+		}
+		else
+			mName[0] = 0;
+	}
+
+
+	EA::Thread::Mutex::Mutex(const MutexParameters* pMutexParameters, bool bDefaultParameters)
+	{
+		if(!pMutexParameters && bDefaultParameters)
+		{
+			MutexParameters parameters;
+			Init(&parameters);
+		}
+		else
+			Init(pMutexParameters);
+	}
+
+
+	EA::Thread::Mutex::~Mutex()
+	{
+		EAT_ASSERT(mMutexData.mnLockCount == 0);
+
+		// Consider doing something to verify the mutex object has been initialized.
+		#if defined(EA_PLATFORM_WINDOWS)
+			if(mMutexData.mbIntraProcess)
+				DeleteCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
+			else
+				CloseHandle(*(HANDLE*)mMutexData.mData);
+		#else
+			DeleteCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
+		#endif
+	}
+
+
+	bool EA::Thread::Mutex::Init(const MutexParameters* pMutexParameters)
+	{
+		// Make sure that internal structure is big enough to hold critical section data.
+		// If this assert fires, please adjust MUTEX_PLATFORM_DATA_SIZE in eathread_mutex.h accordingly.
+		EAT_COMPILETIME_ASSERT(sizeof(CRITICAL_SECTION) <= (MUTEX_PLATFORM_DATA_SIZE / sizeof(uint64_t) * sizeof(uint64_t)));
+		EAT_COMPILETIME_ASSERT(sizeof(HANDLE) <= MUTEX_PLATFORM_DATA_SIZE);
+
+		if(pMutexParameters)
+		{
+			mMutexData.mnLockCount = 0;
+
+			#if defined(EA_PLATFORM_WINDOWS) 
+				mMutexData.mbIntraProcess = pMutexParameters->mbIntraProcess;
+
+				if(mMutexData.mbIntraProcess)
+				{
+					// We use InitializeCriticalSectionAndSpinCount, as that has resulted in improved performance in practice on multiprocessors systems.
+					int rv = InitializeCriticalSectionAndSpinCount((CRITICAL_SECTION*)mMutexData.mData, 256);
+					EAT_ASSERT(rv != 0);
+					EA_UNUSED(rv);
+
+					return true;
+				}
+				else
+				{
+					EAT_COMPILETIME_ASSERT(sizeof(pMutexParameters->mName) <= MAX_PATH);
+					*(HANDLE*)mMutexData.mData = ::CreateMutexA(NULL, false, pMutexParameters->mName[0] ? pMutexParameters->mName : NULL);
+					EAT_ASSERT(*(HANDLE*)mMutexData.mData != 0);
+					return *(HANDLE*)mMutexData.mData != 0;
+				}
+			#else
+				// We use InitializeCriticalSectionAndSpinCount, as that has resulted in improved performance in practice on multiprocessors systems.
+				InitializeCriticalSectionAndSpinCount((CRITICAL_SECTION*)mMutexData.mData, 256);
+				return true;
+			#endif
+		}
+		return false;
+	}
+
+
+
+	#pragma warning(push)
+	#pragma warning(disable: 4706) // disable warning about assignment within a conditional expression
+
+	int EA::Thread::Mutex::Lock(const ThreadTime& timeoutAbsolute)
+	{
+		EAT_ASSERT(mMutexData.mnLockCount < 100000);
+
+		#if defined(EA_PLATFORM_WINDOWS) // Non-Windows is always assumed to be intra-process.
+			if(mMutexData.mbIntraProcess)
+			{
+		#endif
+				if(timeoutAbsolute == kTimeoutNone)
+					EnterCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
+				else
+				{
+					// To consider: Have a pathway for kTimeoutImmediate which doesn't check the current time.
+					while(!TryEnterCriticalSection((CRITICAL_SECTION*)mMutexData.mData))
+					{
+						if(GetThreadTime() >= timeoutAbsolute)
+							return kResultTimeout;
+						Sleep(1);
+					}
+				}
+		#if defined(EA_PLATFORM_WINDOWS)
+			}
+			else
+			{
+				EAT_ASSERT(*(HANDLE*)mMutexData.mData != 0);
+
+				const DWORD dw = ::WaitForSingleObject(*(HANDLE*)mMutexData.mData, RelativeTimeoutFromAbsoluteTimeout(timeoutAbsolute));
+
+				if(dw == WAIT_TIMEOUT)
+					return kResultTimeout;
+
+				if(dw != WAIT_OBJECT_0)
+				{
+					EAT_ASSERT(false);
+					return kResultError;
+				}
+			}
+		#endif
+
+		EAT_ASSERT((mMutexData.mSysThreadId = EA::Thread::GetSysThreadId()) != kSysThreadIdInvalid);
+		EAT_ASSERT(mMutexData.mnLockCount >= 0);
+		return ++mMutexData.mnLockCount; // This is safe to do because we have the lock.
+	}
+
+	#pragma warning(pop)
+
+
+
+	int EA::Thread::Mutex::Unlock()
+	{
+		EAT_ASSERT(mMutexData.mSysThreadId == EA::Thread::GetSysThreadId());
+		EAT_ASSERT(mMutexData.mnLockCount > 0);
+
+		const int nReturnValue(--mMutexData.mnLockCount); // This is safe to do because we have the lock.
+
+		#if defined(EA_PLATFORM_WINDOWS)
+			if(mMutexData.mbIntraProcess)
+				LeaveCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
+			else
+			{
+				EAT_ASSERT(*(HANDLE*)mMutexData.mData != 0);
+				ReleaseMutex(*(HANDLE*)mMutexData.mData);
+			}
+		#else
+			LeaveCriticalSection((CRITICAL_SECTION*)mMutexData.mData);
+		#endif
+
+		return nReturnValue;
+	}
+
+
+	int EA::Thread::Mutex::GetLockCount() const
+	{
+		return mMutexData.mnLockCount;
+	}
+
+
+	bool EA::Thread::Mutex::HasLock() const
+	{
+		#if EAT_ASSERT_ENABLED
+			return (mMutexData.mnLockCount > 0) && (mMutexData.mSysThreadId == EA::Thread::GetSysThreadId());
+		#else
+			return (mMutexData.mnLockCount > 0); // This is the best we can do, though it is of limited use, since it doesn't tell you if you are the thread with the lock.
+		#endif
+	}
+
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+
+
+

+ 911 - 0
source/pc/eathread_pc.cpp

@@ -0,0 +1,911 @@
+///////////////////////////////////////////////////////////////////////////////
+// Copyright (c) Electronic Arts Inc. All rights reserved.
+///////////////////////////////////////////////////////////////////////////////
+
+#include <EABase/eabase.h>
+#include "eathread/eathread.h"
+#include "eathread/eathread_thread.h"
+#include "eathread/eathread_storage.h"
+
+#if defined(EA_PLATFORM_MICROSOFT) && !EA_POSIX_THREADS_AVAILABLE
+	#include <process.h>
+
+	EA_DISABLE_ALL_VC_WARNINGS()
+	#include <Windows.h>
+	#include <stdlib.h> // for mbstowcs
+	#include <setjmp.h>
+	EA_RESTORE_ALL_VC_WARNINGS()
+
+	#include "eathread/eathread_futex.h"
+
+	extern "C" WINBASEAPI DWORD WINAPI SetThreadIdealProcessor(_In_ HANDLE hThread, _In_ DWORD dwIdealProcessor);
+	#if defined(EA_PLATFORM_WIN64)
+		extern "C" WINBASEAPI DWORD WINAPI GetThreadId(_In_ HANDLE hThread);
+		extern "C" WINBASEAPI ULONGLONG GetTickCount64(VOID); // Will not run on pre-Vista OS so 64 bit XP not supported
+	#endif
+
+	// We set this module to initialize early. We want to do this because it 
+	// allows statically initialized objects to call these functions safely.
+	EA_DISABLE_VC_WARNING(4074)  // warning C4074: initializers put in compiler reserved initialization area
+	#pragma init_seg(compiler)
+	EA_RESTORE_VC_WARNING()
+	#ifndef EATHREAD_INIT_SEG_DEFINED 
+		#define EATHREAD_INIT_SEG_DEFINED
+	#endif 
+
+	namespace EA
+	{
+		namespace Thread
+		{
+			// Note by Paul Pedriana:
+			// There is a bit of code here which implements "dynamic thread array maintenance". 
+			// The reason for this is that we are trying to present to the user a consistently
+			// behaving GetThreadId function. The Windows threading API has a number of design
+			// characteristics that make it less than ideal for applications. One of these 
+			// designs is that an application cannot ask the system what its thread id is and 
+			// get a consistent answer; in fact you always get a different answer. 
+
+			// To consider: Use the VC++ undocumented __tlregdtor function to detect thread exits.
+			// __tlregdtor is a VC++ CRT function which detects the exiting of any threads created 
+			// with the CRT beginthread family of functions. It cannot detect the exit of any threads 
+			// that are begun via direct OS thread creation functions, nor can it detect the exit of 
+			// threads that are exited by direct OS thread exit functions. This is may not be a major 
+			// problem, because C/C++ programs should virtually always be calling the CRT thread begin 
+			// and end functions so that the CRT can be maintained properly for the thread.
+			//
+			//    typedef void (*_PVFV)();
+			//    void __tlregdtor(_PVFV func);
+			//    void ThreadExit(){ Do something. May need to be careful about what APIs are called. }
+
+			// Assertion variables.
+			EA::Thread::AssertionFailureFunction gpAssertionFailureFunction = NULL;
+			void*                                gpAssertionFailureContext  = NULL;
+
+			// Dynamic thread array maintenance.
+			// If the user calls GetThreadId from a thread that was created by some third 
+			// party, then we don't have a thread handle for it. The only current way to get 
+			// such a thread handle is to call OpenThread(GetCurrentThreadId()) or 
+			// DuplicateHandle(GetCurrentThread()). In either case the return value is a 
+			// handle which must be disposed of via CloseHandle. Additionally, since the 
+			// thread was created by a thrid party, it's entirely possible that the thread 
+			// will be exited without us ever finding about it. But we still need to call 
+			// CloseHandle on the handle. So we maintain an array of handles and check their
+			// status periodically and upon process exit.
+
+			const size_t kMaxThreadDynamicArrayCount = 128;
+
+			struct DynamicThreadArray
+			{
+				static HANDLE            mhDynamicThreadArray[kMaxThreadDynamicArrayCount];
+				static CRITICAL_SECTION  mCriticalSection;
+				static bool              mbDynamicThreadArrayInitialized;
+
+				static void Initialize();
+				static void CheckDynamicThreadArray(bool bCloseAll);
+				static void AddDynamicThreadHandle(HANDLE hThread, bool bAdd);
+			};
+
+			HANDLE DynamicThreadArray::mhDynamicThreadArray[kMaxThreadDynamicArrayCount];
+			CRITICAL_SECTION DynamicThreadArray::mCriticalSection;
+			bool DynamicThreadArray::mbDynamicThreadArrayInitialized;
+
+			// DynamicThreadArray ctor/dtor were removed to because memory tracking systems that are required to run 
+			// pre-main and post-main.  In order to support memory tracking of allocations that occur post-main we
+			// intentially "leak" a operating system critical section and leave it to be cleaned up by the operating
+			// system at process shutdown.
+			//
+			// DynamicThreadArray::DynamicThreadArray()
+			// {
+			//     Initialize();
+			// }
+
+			// DynamicThreadArray::~DynamicThreadArray()
+			// {
+			//     CheckDynamicThreadArray(true);
+			//     DeleteCriticalSection(&mCriticalSection);
+			// }
+
+			void DynamicThreadArray::Initialize()
+			{
+				static EA::Thread::Futex m;
+
+				const bool done = mbDynamicThreadArrayInitialized;
+
+				// ensure that if we've seen previous writes to mbDynamicThreadArrayInitialized, we also
+				// see the writes to mCriticalSection, to avoid the case where another thread sees the flag
+				// before it sees the initialization
+				EAReadBarrier();
+
+				if(!done)
+				{
+					EA::Thread::AutoFutex _(m);
+
+					if (!mbDynamicThreadArrayInitialized)
+					{
+						memset(mhDynamicThreadArray, 0, sizeof(mhDynamicThreadArray));
+						InitializeCriticalSection(&mCriticalSection);
+
+						// ensure writes to mCriticalSection and mhDynamicThreadArray are visible before writes
+						// to mbDynamicThreadArrayInitialized, to avoid the case where another thread sees the
+						// flag before it sees the initialization
+						EAWriteBarrier();
+
+						mbDynamicThreadArrayInitialized = true;
+					}
+				}
+			}
+
+			// This function looks at the existing set of thread ids and see if any of them 
+			// were quit. If so then this function removes their entry from our array of 
+			// thread handles, and most importantly, calls CloseHandle on the thread handle.
+			void DynamicThreadArray::CheckDynamicThreadArray(bool bCloseAll)
+			{
+				Initialize();
+
+				EnterCriticalSection(&mCriticalSection);
+
+				for(size_t i(0); i < sizeof(mhDynamicThreadArray)/sizeof(mhDynamicThreadArray[0]); i++)
+				{
+					if(mhDynamicThreadArray[i])
+					{
+						DWORD dwExitCode(0);
+
+						// Note that GetExitCodeThread is a hazard if the user of a thread exits 
+						// with a return value that is equal to the value of STILL_ACTIVE (i.e. 259).
+						// We can document that users shouldn't do this, or we can change the code 
+						// here to use WaitForSingleObject(hThread, 0) and assume the thread is 
+						// still active if the return value is WAIT_TIMEOUT.
+						if(bCloseAll || !GetExitCodeThread(mhDynamicThreadArray[i], &dwExitCode) || (dwExitCode != STILL_ACTIVE)) // If the thread id is invalid or it has exited...
+						{
+							CloseHandle(mhDynamicThreadArray[i]); // This matches the DuplicateHandle call we made below.
+							mhDynamicThreadArray[i] = 0;
+						}
+					}
+				}
+
+				LeaveCriticalSection(&mCriticalSection);
+			}
+
+			void DynamicThreadArray::AddDynamicThreadHandle(HANDLE hThread, bool bAdd)
+			{
+				Initialize();
+
+				if(hThread)
+				{
+					EnterCriticalSection(&mCriticalSection);
+
+					if(bAdd)
+					{
+						for(size_t i(0); i < sizeof(mhDynamicThreadArray)/sizeof(mhDynamicThreadArray[0]); i++)
+						{
+							if(mhDynamicThreadArray[i] == kThreadIdInvalid)
+							{
+								mhDynamicThreadArray[i] = hThread;
+								hThread = kThreadIdInvalid;         // This tells us that we succeeded, and we'll use this result below.
+								break;
+							}
+						}
+
+						EAT_ASSERT(hThread == kThreadIdInvalid);    // Assert that there was enough room (that the above loop found a spot).
+						if(hThread != kThreadIdInvalid)             // If not, then we need to free the handle.
+							CloseHandle(hThread);                   // This matches the DuplicateHandle call we made below.
+					}
+					else
+					{
+						for(size_t i(0); i < sizeof(mhDynamicThreadArray)/sizeof(mhDynamicThreadArray[0]); i++)
+						{
+							if(mhDynamicThreadArray[i] == hThread)
+							{
+								CloseHandle(hThread);           // This matches the DuplicateHandle call we made below.
+								mhDynamicThreadArray[i] = kThreadIdInvalid;
+								break;
+							}
+						}
+						// By design, we don't consider a non-found handle an error. It may simply be the 
+						// case that the given handle was not a dynamnic thread handle. Due to the way 
+						// Windows works, there's just no way for us to tell.
+					}
+
+					LeaveCriticalSection(&mCriticalSection);
+				}
+			}
+
+			// Thread handle local storage.
+			// We have this code here in order to cache the thread handles for 
+			// threads, so that the user gets a consistent return value from the 
+			// GetThreadId function for each unique thread.
+
+			static DWORD dwThreadHandleTLS = TLS_OUT_OF_INDEXES; // We intentionally make this an independent variable so that it is initialized unilaterally on segment load.
+
+			struct TLSAlloc
+			{
+				TLSAlloc()
+				{
+					if(dwThreadHandleTLS == TLS_OUT_OF_INDEXES) // It turns out that the user might have set this to a 
+						dwThreadHandleTLS = TlsAlloc();         // value before this constructor has run. So we check.
+				}
+
+				#if EATHREAD_TLSALLOC_DTOR_ENABLED
+				// Since this class is used only as a static variable, this destructor would
+				// only get called during module destruction: app quit or DLL unload. 
+				// In the case of DLL unload, we may have a problem if the DLL was unloaded 
+				// before threads created by it were destroyed. Whether the problem is significant
+				// depends on the application. In most cases it won't be significant.
+				//
+				// We want to call TlsFree because not doing so results in a memory leak and eventual
+				// exhaustion of TLS ids by the system.
+				~TLSAlloc()
+				{
+					if(dwThreadHandleTLS != TLS_OUT_OF_INDEXES)
+					{
+						// We don't read the hThread stored at dwThreadHandleTLS and call CloseHandle
+						// on it, as the DynamicThreadArray destructor will deal with closing any
+						// thread handles this module knows about. 
+
+						TlsFree(dwThreadHandleTLS);
+						dwThreadHandleTLS = TLS_OUT_OF_INDEXES;
+					}
+				}
+				#endif
+			};
+			static TLSAlloc sTLSAlloc;
+
+			void SetCurrentThreadHandle(HANDLE hThread, bool bDynamic)
+			{
+				// EAT_ASSERT(hThread != kThreadIdInvalid); We can't do this, as we can be intentionally called with an hThread of kThreadIdInvalid.
+				if(dwThreadHandleTLS == TLS_OUT_OF_INDEXES) // This should generally always evaluate to true because we init dwThreadHandleTLS on startup.
+					dwThreadHandleTLS = TlsAlloc();
+				EAT_ASSERT(dwThreadHandleTLS != TLS_OUT_OF_INDEXES);
+				if(dwThreadHandleTLS != TLS_OUT_OF_INDEXES)
+				{
+					DynamicThreadArray::CheckDynamicThreadArray(false);
+					if(bDynamic)
+					{
+						if(hThread != kThreadIdInvalid) // If adding the hThread...
+							DynamicThreadArray::AddDynamicThreadHandle(hThread, true);
+						else // Else removing the existing current thread handle...
+						{
+							HANDLE hThreadOld = TlsGetValue(dwThreadHandleTLS);
+							if(hThreadOld != kThreadIdInvalid) // This should always evaluate to true in practice.
+								DynamicThreadArray::AddDynamicThreadHandle(hThreadOld, false); // Will Close the dynamic thread handle if it is one.
+						}
+					}
+					TlsSetValue(dwThreadHandleTLS, hThread);
+				}
+			}
+		} // namespace Thread
+	} // namespace EA
+
+
+EATHREADLIB_API EA::Thread::ThreadId EA::Thread::GetThreadId()
+{
+	// We have some non-trivial code here because Windows doesn't provide a means for a
+	// thread to read its own thread id (thread handle) in a consistent way. 
+
+	// If we have allocated thread-local storage for this module...
+	if(dwThreadHandleTLS != TLS_OUT_OF_INDEXES)
+	{
+		void* const pValue = TlsGetValue(dwThreadHandleTLS);
+
+		if(pValue)          // If the current thread's ThreadId has been previously saved...
+			return pValue;  // Under Win32, type ThreadId should be the same as HANDLE which should be the same as void*.
+
+		// Else fall through and get the current thread handle and cache it so that next time the above code will succeed.
+	}
+
+	// In this case the thread was not created by EAThread. So we give 
+	// the thread a new Id, based on GetCurrentThread and DuplicateHandle.
+	// GetCurrentThread returns a "pseudo handle" which isn't actually the 
+	// thread handle but is a hard-coded constant which means "current thread".
+	// If you want to get a real thread handle then you need to call DuplicateHandle
+	// on the pseudo handle. Every time you call DuplicateHandle you get a different
+	// result, yet we want this GetThreadId function to return a consistent value
+	// to the user, as that's what a rational user would expect. So after calling
+	// DuplicateHandle we save the value for the next time the user calls this 
+	// function. We save the value in thread-local storage, so each unique thread
+	// sees a unique view of GetThreadId.
+	HANDLE hThread, hThreadPseudo = GetCurrentThread();
+	BOOL bResult = DuplicateHandle(GetCurrentProcess(), hThreadPseudo, GetCurrentProcess(), &hThread, 0, true, DUPLICATE_SAME_ACCESS);
+	EAT_ASSERT(bResult && (hThread != kThreadIdInvalid));
+	if(bResult)
+		EA::Thread::SetCurrentThreadHandle(hThread, true); // Need to eventually call CloseHandle on hThread, so we store it.
+
+	return hThread;        
+}
+
+EATHREADLIB_API EA::Thread::ThreadId EA::Thread::GetThreadId(EA::Thread::SysThreadId id)
+{
+	EAThreadDynamicData* const pTDD = EA::Thread::FindThreadDynamicData(id);
+	if(pTDD)
+	{   
+		return pTDD->mhThread;
+	}
+
+	return EA::Thread::kThreadIdInvalid;
+}
+
+EATHREADLIB_API EA::Thread::SysThreadId EA::Thread::GetSysThreadId(ThreadId id)
+{
+	#if defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
+		// Win64 has this function natively.
+		return ::GetThreadId(id);
+
+		// Fast implementation of this, which has been verified:
+		// uintptr_t pTIB = __readgsqword(0x30);
+		// uint32_t threadId = *((uint32_t*)(((uint8_t*)pTIB) + 0x48));
+		// return (EA::Thread::SysThreadId)threadId;
+
+	#elif defined(EA_PLATFORM_WIN32)
+
+		// What we do here is first try to use the GetThreadId function, which is 
+		// available on some later versions of WinXP and later OSs. If that doesn't
+		// work then we are using an earlier OS and we use the NtQueryInformationThread
+		// kernel function to read thread info.
+
+		typedef DWORD (WINAPI *GetThreadIdFunc)(HANDLE);
+		typedef BOOL (WINAPI *NtQueryInformationThreadFunc)(HANDLE, int, PVOID, ULONG, PULONG);
+
+		// We implement our own manual version of static variables here. We do this because 
+		// the static variable mechanism the compiler provides wouldn't provide thread 
+		// safety for us. 
+		static volatile bool                sInitialized = false;
+		static GetThreadIdFunc              spGetThreadIdFunc = NULL;
+		static NtQueryInformationThreadFunc spNtQueryInformationThread = NULL;
+
+		if(!sInitialized)
+		{
+			HMODULE hKernel32 = GetModuleHandleA("kernel32.dll");
+			if(hKernel32)
+				spGetThreadIdFunc = (GetThreadIdFunc)(uintptr_t)GetProcAddress(hKernel32, "GetThreadId");
+
+			if(!spGetThreadIdFunc)
+			{
+				HMODULE hNTDLL = GetModuleHandleA("ntdll.dll");
+
+				if(hNTDLL)
+					spNtQueryInformationThread = (NtQueryInformationThreadFunc)(uintptr_t)GetProcAddress(hNTDLL, "NtQueryInformationThread");
+			}
+
+			sInitialized = true;
+		}
+
+		if(spGetThreadIdFunc)
+			return (SysThreadId)spGetThreadIdFunc(id);
+
+		if(spNtQueryInformationThread)
+		{
+			struct THREAD_BASIC_INFORMATION_WIN32
+			{
+				BOOL  ExitStatus;
+				PVOID TebBaseAddress;
+				DWORD UniqueProcessId;
+				DWORD UniqueThreadId;
+				DWORD AffinityMask;
+				DWORD Priority;
+				DWORD BasePriority;
+			};
+
+			THREAD_BASIC_INFORMATION_WIN32 tbi;
+
+			if(spNtQueryInformationThread(id, 0, &tbi, sizeof(tbi), NULL) == 0)
+			   return (SysThreadId)tbi.UniqueThreadId;
+		}
+
+		return kSysThreadIdInvalid;
+
+	#endif
+}
+
+
+EATHREADLIB_API EA::Thread::SysThreadId EA::Thread::GetSysThreadId()
+{
+	return ::GetCurrentThreadId();
+}
+
+
+EATHREADLIB_API int EA::Thread::GetThreadPriority()
+{
+	const int nPriority = ::GetThreadPriority(GetCurrentThread());
+	return kThreadPriorityDefault + (nPriority - THREAD_PRIORITY_NORMAL);
+}
+
+
+EATHREADLIB_API bool EA::Thread::SetThreadPriority(int nPriority)
+{
+	EAT_ASSERT(nPriority != kThreadPriorityUnknown);
+	int nNewPriority = THREAD_PRIORITY_NORMAL + (nPriority - kThreadPriorityDefault);
+	bool result = ::SetThreadPriority(GetCurrentThread(), nNewPriority) != 0;
+
+	// Windows process running in NORMAL_PRIORITY_CLASS is picky about the priority passed in.
+	// So we need to set the priority to the next priority supported
+	#if defined(EA_PLATFORM_WINDOWS) || defined(EA_PLATFORM_CAPILANO)
+		HANDLE thread = GetCurrentThread();
+
+		while(!result)
+		{
+			if (nNewPriority >= THREAD_PRIORITY_TIME_CRITICAL) 
+				return ::SetThreadPriority(thread, THREAD_PRIORITY_TIME_CRITICAL) != 0;
+
+			if (nNewPriority <= THREAD_PRIORITY_IDLE) 
+				return ::SetThreadPriority(thread, THREAD_PRIORITY_IDLE) != 0;
+
+			result = ::SetThreadPriority(thread, nNewPriority) != 0;
+			nNewPriority++;
+		}
+	#endif
+
+	return result;
+}
+
+
+EATHREADLIB_API void EA::Thread::SetThreadProcessor(int nProcessor)
+{
+	#if   defined(EA_PLATFORM_CAPILANO)
+
+		DWORD mask = 0xFF; //Default to all
+		if (nProcessor >= 0)
+			mask = (DWORD)(1 << nProcessor);
+
+		SetThreadAffinityMask(GetCurrentThread(), mask);
+
+	#else
+		static const int nProcessorCount = GetProcessorCount();
+
+		if(nProcessor < 0)
+			nProcessor = MAXIMUM_PROCESSORS; // This cases the SetThreadIdealProcessor to reset to 'no ideal processor'.
+		else
+		{
+			if(nProcessor >= nProcessorCount)
+				nProcessor %= nProcessorCount; 
+		}
+
+		// SetThreadIdealProcessor differs from SetThreadAffinityMask in that SetThreadIdealProcessor is not 
+		// a strict assignment, and it allows the OS to move the thread if the ideal processor is busy. 
+		// SetThreadAffinityMask is a more rigid assignment, but it can result in slower performance and 
+		// possibly hangs due to processor contention between threads. For Windows we use SetIdealThreadProcessor
+		// in the name of safety and likely better overall performance.
+		SetThreadIdealProcessor(GetCurrentThread(), (DWORD)nProcessor);
+
+	#endif
+}
+
+
+void* EA::Thread::GetThreadStackBase()
+{
+	#if   defined(EA_PLATFORM_WIN32) && defined(EA_PROCESSOR_X86) && defined(EA_COMPILER_MSVC)
+		// Offset 0x18 from the FS segment register gives a pointer to
+		// the thread information block for the current thread
+		// VC++ also offers the __readfsdword() intrinsic, which would be better to use here.
+		NT_TIB* pTib;
+
+		__asm {
+			mov eax, fs:[18h]
+			mov pTib, eax
+		}
+
+		return (void*)pTib->StackBase;
+
+	#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64) && defined(EA_COMPILER_MSVC)
+		// VC++ also offers the __readgsdword() intrinsic, which is an alternative which could
+		// retrieve the current thread TEB if the following proves unreliable.
+		PNT_TIB64 pTib = reinterpret_cast<PNT_TIB64>(NtCurrentTeb());
+
+		return (void*)pTib->StackBase;
+
+	#elif defined(EA_PLATFORM_WIN32) && defined(EA_PROCESSOR_X86) && defined(EA_COMPILER_GCC)
+		NT_TIB* pTib;
+
+		asm ( "movl %%fs:0x18, %0\n"
+			  : "=r" (pTib)
+			);
+
+		return (void*)pTib->StackBase;
+	#endif
+}
+
+
+#if defined(EA_PLATFORM_WIN32) && defined(EA_PROCESSOR_X86) && defined(_MSC_VER) && (_MSC_VER >= 1400)
+	// People report on the Internet that this function can get you what CPU the current thread
+	// is running on. But that's false, as this function has been seen to return values greater than
+	// the number of physical or real CPUs present. For example, this function returns 6 for my 
+	// Single CPU that's dual-hyperthreaded.
+	static int GetCurrentProcessorNumberCPUID()
+	{
+		_asm { mov eax, 1   }
+		_asm { cpuid        }
+		_asm { shr ebx, 24  }
+		_asm { mov eax, ebx }
+	}
+
+	int GetCurrentProcessorNumberXP()
+	{
+		int cpuNumber = GetCurrentProcessorNumberCPUID();
+		int cpuCount  = EA::Thread::GetProcessorCount();
+
+		return (cpuNumber % cpuCount); // I don't know if this is the right thing to do, but it's better than returning an impossible number and Windows XP is a fading OS as it is.
+	}
+
+#endif
+
+
+EATHREADLIB_API int EA::Thread::GetThreadProcessor()
+{
+	#if defined(EA_PLATFORM_WIN32)
+		// Only Windows Vista and later provides GetCurrentProcessorNumber.
+		// So we must dynamically link to this function.
+		static EA_THREAD_LOCAL bool           bInitialized = false;
+		static EA_THREAD_LOCAL DWORD (WINAPI *pfnGetCurrentProcessorNumber)() = NULL;
+
+		if(!bInitialized)
+		{
+			HMODULE hKernel32 = GetModuleHandleA("KERNEL32.DLL");
+			if(hKernel32)
+				pfnGetCurrentProcessorNumber = (DWORD (WINAPI*)())(uintptr_t)GetProcAddress(hKernel32, "GetCurrentProcessorNumber");
+			bInitialized = true;
+		}
+
+		if(pfnGetCurrentProcessorNumber)
+			return (int)(unsigned)pfnGetCurrentProcessorNumber();
+
+		#if defined(EA_PLATFORM_WINDOWS) && defined(EA_PROCESSOR_X86) && defined(_MSC_VER) && (_MSC_VER >= 1400)
+			return GetCurrentProcessorNumberXP();
+		#else
+			return 0;
+		#endif
+
+	#elif defined(EA_PLATFORM_WIN64)
+		static EA_THREAD_LOCAL bool           bInitialized = false;
+		static EA_THREAD_LOCAL DWORD (WINAPI *pfnGetCurrentProcessorNumber)() = NULL;
+
+		if(!bInitialized)
+		{
+			HMODULE hKernel32 = GetModuleHandleA("KERNEL32.DLL"); // Yes, we want to use Kernel32.dll. There is no Kernel64.dll on Win64. 
+			if(hKernel32)
+				pfnGetCurrentProcessorNumber = (DWORD (WINAPI*)())(uintptr_t)GetProcAddress(hKernel32, "GetCurrentProcessorNumber");
+			bInitialized = true;
+		}
+
+		if(pfnGetCurrentProcessorNumber)
+			return (int)(unsigned)pfnGetCurrentProcessorNumber();
+
+		return 0;
+
+	#else
+		return (int)(unsigned)GetCurrentProcessorNumber();
+
+	#endif
+}
+
+ 
+EATHREADLIB_API void EA::Thread::SetThreadAffinityMask(const EA::Thread::ThreadId& id, ThreadAffinityMask nAffinityMask)
+{
+	// Update the affinity mask in the thread dynamic data cache.
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if(pTDD)
+	{
+		pTDD->mnThreadAffinityMask = nAffinityMask;
+	}
+
+#if EATHREAD_THREAD_AFFINITY_MASK_SUPPORTED
+	#if defined(EA_PLATFORM_CAPILANO)
+		DWORD_PTR nProcessorCountMask = 0x7F;  // default to all 7 available cores.
+	#else
+		DWORD_PTR nProcessorCountMask = (DWORD_PTR)1 << GetProcessorCount(); 
+	#endif
+
+	// Call the Windows library function.
+	DWORD_PTR nProcessAffinityMask, nSystemAffinityMask;
+	if(EA_LIKELY(GetProcessAffinityMask(GetCurrentProcess(), &nProcessAffinityMask, &nSystemAffinityMask)))
+		nProcessorCountMask = nProcessAffinityMask;
+	
+	nAffinityMask &= nProcessorCountMask;
+
+	auto opResult = ::SetThreadAffinityMask(id, static_cast<DWORD_PTR>(nAffinityMask));
+	EA_UNUSED(opResult);
+	EAT_ASSERT_FORMATTED(opResult != 0, "The Windows platform SetThreadAffinityMask failed. GetLastError %x", GetLastError());
+#endif
+}
+
+EATHREADLIB_API EA::Thread::ThreadAffinityMask EA::Thread::GetThreadAffinityMask(const EA::Thread::ThreadId& id)
+{ 
+	// Update the affinity mask in the thread dynamic data cache.
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if(pTDD)
+	{
+		return pTDD->mnThreadAffinityMask;
+	}
+
+	return kThreadAffinityMaskAny;
+}
+
+
+// Internal SetThreadName API's so we don't repeat the implementations
+namespace EA {
+namespace Thread {
+namespace Internal {
+	bool PixSetThreadName(EA::Thread::ThreadId threadId, const char* pName)
+	{
+		EA_UNUSED(threadId); EA_UNUSED(pName);
+
+		bool result = true;
+
+	#if (defined(EA_PLATFORM_CAPILANO) && EA_CAPILANO_DBG_ENABLED == 1)
+		wchar_t wName[EATHREAD_NAME_SIZE];
+		mbstowcs(wName, pName, EATHREAD_NAME_SIZE);
+		result = (::SetThreadName(threadId, wName) == TRUE); // requires toolhelpx.lib
+		EAT_ASSERT(result);
+	#endif
+
+		return result; 
+	}
+
+	bool WinSetThreadName(EA::Thread::ThreadId threadId, const char* pName)
+	{
+		bool result = true;
+
+		typedef HRESULT(WINAPI *SetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
+
+		// Check if Windows Operating System has the 'SetThreadDescription" API.
+		auto pSetThreadDescription = (SetThreadDescription)GetProcAddress(GetModuleHandleA("kernel32.dll"), "SetThreadDescription");
+		if (pSetThreadDescription)
+		{
+			wchar_t wName[EATHREAD_NAME_SIZE];
+			mbstowcs(wName, pName, EATHREAD_NAME_SIZE);
+
+			result = SUCCEEDED(pSetThreadDescription(threadId, wName));
+			EAT_ASSERT(result);
+		}
+
+		return result;
+	}
+	
+	void WinSetThreadNameByException(EA::Thread::SysThreadId threadId, const char* pName)
+	{
+		struct ThreadNameInfo
+		{
+			DWORD dwType;
+			LPCSTR lpName;
+			DWORD dwThreadId;
+			DWORD dwFlags;
+		};
+
+		// This setjmp/longjmp weirdness is here to work around an apparent bug in the VS2013 debugger,
+		// whereby EBX will be trashed on return from RaiseException, causing bad things to happen in code
+		// which runs later. This only seems to happen when a debugger is attached and there's some managed
+		// code in the process.
+
+		jmp_buf jmpbuf;
+
+		__pragma(warning(push))
+		__pragma(warning(disable : 4611))
+		if (!setjmp(jmpbuf))
+		{
+			ThreadNameInfo threadNameInfo = {0x1000, pName, threadId, 0};
+			__try { RaiseException(0x406D1388, 0, sizeof(threadNameInfo) / sizeof(ULONG_PTR), (CONST ULONG_PTR*)(uintptr_t)&threadNameInfo); }
+			__except (GetExceptionCode() == 0x406D1388 ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { }
+			longjmp(jmpbuf, 1);
+		}
+		__pragma(warning(pop))
+	}
+
+	void SetThreadName(EAThreadDynamicData* pTDD, const char* pName)
+	{
+		strncpy(pTDD->mName, pName, EATHREAD_NAME_SIZE);
+		pTDD->mName[EATHREAD_NAME_SIZE - 1] = 0;
+
+	#if defined(EA_PLATFORM_WINDOWS) && defined(_MSC_VER) || (defined(EA_PLATFORM_CAPILANO))
+		if(pTDD->mName[0] && (pTDD->mhThread != EA::Thread::kThreadIdInvalid))
+		{
+			#if EATHREAD_NAMING == EATHREAD_NAMING_DISABLED
+				bool namingEnabled = false;
+			#elif EATHREAD_NAMING == EATHREAD_NAMING_ENABLED
+				bool namingEnabled = true;
+			#else
+				bool namingEnabled = IsDebuggerPresent();
+			#endif
+
+			if(namingEnabled)
+			{
+				PixSetThreadName(pTDD->mhThread, pTDD->mName);  
+				WinSetThreadName(pTDD->mhThread, pTDD->mName);  
+				WinSetThreadNameByException(pTDD->mnThreadId, pTDD->mName);  
+			}
+		}
+	#endif
+	}
+} // namespace Internal
+} // namespace Thread
+} // namespace EA
+
+EATHREADLIB_API void EA::Thread::SetThreadName(const char* pName) { SetThreadName(GetThreadId(), pName); }
+EATHREADLIB_API const char* EA::Thread::GetThreadName() { return GetThreadName(GetThreadId()); }
+
+EATHREADLIB_API void EA::Thread::SetThreadName(const EA::Thread::ThreadId& id, const char* pName)
+{
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	if(pTDD)
+	{
+		Internal::SetThreadName(pTDD, pName);
+	}
+}
+
+EATHREADLIB_API const char* EA::Thread::GetThreadName(const EA::Thread::ThreadId& id)
+{ 
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(id);
+	return pTDD ?  pTDD->mName : "";
+}
+
+EATHREADLIB_API int EA::Thread::GetProcessorCount()
+{
+	#if defined(EA_PLATFORM_CAPILANO)
+		// Capilano has 7-ish physical CPUs available to titles.  We can access 50 - 90% of the 7th Core.  
+		// Check platform documentation for details.
+	    DWORD_PTR ProcessAffinityMask;
+		DWORD_PTR SystemAffinityMask;
+		unsigned long nCoreCount = 6;
+
+		if(EA_LIKELY(GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, &SystemAffinityMask)))
+		{
+			_BitScanForward(&nCoreCount, (unsigned long)~ProcessAffinityMask);
+		}
+
+		return (int) nCoreCount; 
+
+	#elif defined(EA_PLATFORM_WINDOWS)
+		static int nProcessorCount = 0; // This doesn't really need to be an atomic integer.
+
+		if(nProcessorCount == 0)
+		{
+			// A better function to use would possibly be KeQueryActiveProcessorCount 
+			// (NTKERNELAPI ULONG KeQueryActiveProcessorCount(PKAFFINITY ActiveProcessors))
+
+			SYSTEM_INFO systemInfo;
+			memset(&systemInfo, 0, sizeof(systemInfo));
+			GetSystemInfo(&systemInfo);
+			nProcessorCount = (int)systemInfo.dwNumberOfProcessors;
+		}
+
+		return nProcessorCount;
+
+	#else
+		static int nProcessorCount = 0; // This doesn't really need to be an atomic integer.
+
+		if(nProcessorCount == 0)
+		{
+			// A better function to use would possibly be KeQueryActiveProcessorCount 
+			// (NTKERNELAPI ULONG KeQueryActiveProcessorCount(PKAFFINITY ActiveProcessors))
+
+			SYSTEM_INFO systemInfo;
+			memset(&systemInfo, 0, sizeof(systemInfo));
+			GetNativeSystemInfo(&systemInfo);
+			nProcessorCount = (int)systemInfo.dwNumberOfProcessors;
+		}
+
+		return nProcessorCount;
+
+	#endif
+}
+
+
+EATHREADLIB_API void EA::Thread::ThreadSleep(const ThreadTime& timeRelative)
+{
+	// Sleep(0) sleeps the current thread if any other thread of equal priority is ready to run.
+	// Sleep(n) sleeps the current thread for up to n milliseconds if there is any other thread of any priority ready to run.
+	// SwitchToThread() sleeps the current thread for one time slice if there is any other thread of any priority ready to run.
+
+	if(timeRelative == 0)
+		SwitchToThread();  // It's debateable whether we should do a SwitchToThread or a Sleep(0) here. 
+	else                        // The only difference is that the former allows threads of lower priority to execute.
+		SleepEx((unsigned)timeRelative, TRUE);
+}
+
+
+namespace EA { 
+	namespace Thread {
+		extern EAThreadDynamicData* FindThreadDynamicData(ThreadId threadId);
+		extern EAThreadDynamicData* FindThreadDynamicData(SysThreadId sysThreadId);
+	}
+}
+
+void EA::Thread::ThreadEnd(intptr_t threadReturnValue)
+{
+	EAThreadDynamicData* const pTDD = FindThreadDynamicData(GetThreadId());
+	if(pTDD)
+	{
+		pTDD->mnStatus = Thread::kStatusEnded;
+		pTDD->mnReturnValue = threadReturnValue;
+		pTDD->Release();
+	}
+
+	EA::Thread::SetCurrentThreadHandle(kThreadIdInvalid, true); // We use 'true' here just to be safe, as we don't know who is calling this function.
+
+	#if defined(EA_PLATFORM_CAPILANO)
+		// _endthreadex is not supported on Capilano because it's not compatible with C++/CX and /ZW.  Use of ExitThread could result in memory leaks
+		// as ExitThread does not clean up memory allocated by the C runtime library.
+		// https://forums.xboxlive.com/AnswerPage.aspx?qid=47c1607c-bb18-4bc4-a79a-a40c59444ff3&tgt=1        
+		ExitThread(static_cast<DWORD>(threadReturnValue));
+	#elif defined(EA_PLATFORM_MICROSOFT) && defined(EA_PLATFORM_CONSOLE) && !defined(EA_PLATFORM_CAPILANO)
+		EAT_FAIL_MSG("EA::Thread::ThreadEnd: Not supported by this platform.");
+	#else
+		_endthreadex((unsigned int)threadReturnValue);
+	#endif
+}
+
+
+EATHREADLIB_API EA::Thread::ThreadTime EA::Thread::GetThreadTime()
+{
+	// We choose to use GetTickCount because it low overhead and 
+	// still yields values that have a precision in the same range
+	// as the Win32 thread time slice time. In particular: 
+	//     rdtsc takes ~5 cycles and has a nanosecond resolution. But it is unreliable
+	//     GetTickCount() takes ~80 cycles and has ~15ms resolution.
+	//     timeGetTime() takes ~350 cpu cycles and has 1ms resolution.
+	//     QueryPerformanceCounter() takes ~3000 cpu cycles on most machines and has ~1us resolution.
+	//     We add EATHREAD_MIN_ABSOLUTE_TIME to this absolute time to ensure this absolute time is never less than our min 
+	//     (This fix was required because GetTickCount64 starts at 0x0 for titles on capilano)
+	#if   defined(EA_PLATFORM_MICROSOFT) && defined(EA_PROCESSOR_X86_64)
+		return (ThreadTime)(GetTickCount64() + EATHREAD_MIN_ABSOLUTE_TIME);
+	#else                                                      // Note that this value matches the value used by some runtime assertion code within EA::Thread. It would be best to define this as a shared constant between modules.
+		return (ThreadTime)(GetTickCount() + EATHREAD_MIN_ABSOLUTE_TIME);
+	#endif
+}
+
+
+EATHREADLIB_API void EA::Thread::SetAssertionFailureFunction(EA::Thread::AssertionFailureFunction pAssertionFailureFunction, void* pContext)
+{
+	gpAssertionFailureFunction = pAssertionFailureFunction;
+	gpAssertionFailureContext  = pContext;
+}
+
+
+EATHREADLIB_API void EA::Thread::AssertionFailure(const char* pExpression)
+{
+	if(gpAssertionFailureFunction)
+		gpAssertionFailureFunction(pExpression, gpAssertionFailureContext);
+	else
+	{
+		#if EAT_ASSERT_ENABLED
+			OutputDebugStringA("EA::Thread::AssertionFailure: ");
+			OutputDebugStringA(pExpression);
+			OutputDebugStringA("\n");
+			#ifdef _MSC_VER
+				__debugbreak();
+			#endif
+		#endif
+	}
+}
+
+uint32_t EA::Thread::RelativeTimeoutFromAbsoluteTimeout(ThreadTime timeoutAbsolute)
+{
+	EAT_ASSERT((timeoutAbsolute == kTimeoutImmediate) || (timeoutAbsolute > EATHREAD_MIN_ABSOLUTE_TIME)); // Assert that the user didn't make the mistake of treating time as relative instead of absolute.
+
+	DWORD timeoutRelative = 0;
+
+	if (timeoutAbsolute == kTimeoutNone)
+	{
+		timeoutRelative = INFINITE;
+	}
+	else if (timeoutAbsolute == kTimeoutImmediate)
+	{
+		timeoutRelative = 0;
+	}
+	else
+	{
+		ThreadTime timeCurrent(GetThreadTime());
+		timeoutRelative = (timeoutAbsolute > timeCurrent) ? static_cast<DWORD>(timeoutAbsolute - timeCurrent) : 0;
+	}
+
+	EAT_ASSERT((timeoutRelative == INFINITE) || (timeoutRelative < 100000000)); // Assert that the timeout is a sane value and didn't wrap around.
+
+	return timeoutRelative;
+}
+
+#endif // EA_PLATFORM_XXX
+
+
+
+
+
+
+
+

Některé soubory nejsou zobrazeny, neboť je v těchto rozdílových datech změněno mnoho souborů