Fluids_kernels.cu 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. // Texture reference for reading velocity field
  4. texture<float2, 2> texref;
  5. // Note that these kernels are designed to work with arbitrary
  6. // domain sizes, not just domains that are multiples of the tile
  7. // size. Therefore, we have extra code that checks to make sure
  8. // a given thread location falls within the domain boundaries in
  9. // both X and Y. Also, the domain is covered by looping over
  10. // multiple elements in the Y direction, while there is a one-to-one
  11. // mapping between threads in X and the tile size in X.
  12. // Nolan Goodnight 9/22/06
  13. // This method adds constant force vectors to the velocity field
  14. // stored in 'v' according to v(x,t+1) = v(x,t) + dt * f.
  15. extern "C" __global__ void
  16. addForces_k(
  17. float2 *v,
  18. int dx,
  19. int dy,
  20. int spx,
  21. int spy,
  22. float fx,
  23. float fy,
  24. int r,
  25. size_t pitch) {
  26. int tx = threadIdx.x;
  27. int ty = threadIdx.y;
  28. float2 *fj = (float2*)((char*)v + (ty + spy) * pitch) + tx + spx;
  29. float2 vterm = *fj;
  30. tx -= r; ty -= r;
  31. float s = 1.f / (1.f + tx*tx*tx*tx + ty*ty*ty*ty);
  32. vterm.x += s * fx;
  33. vterm.y += s * fy;
  34. *fj = vterm;
  35. }
  36. // This method performs the velocity advection step, where we
  37. // trace velocity vectors back in time to update each grid cell.
  38. // That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear
  39. // interpolation in the velocity space.
  40. extern "C" __global__ void
  41. advectVelocity_k(float *vx, float *vy,
  42. int dx, int pdx, int dy, float dt, int lb) {
  43. int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
  44. int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
  45. int p;
  46. float2 vterm, ploc;
  47. float vxterm, vyterm;
  48. // gtidx is the domain location in x for this thread
  49. if (gtidx < dx) {
  50. for (p = 0; p < lb; p++) {
  51. // fi is the domain location in y for this thread
  52. int fi = gtidy + p;
  53. if (fi < dy) {
  54. int fj = fi * pdx + gtidx;
  55. vterm = tex2D(texref, (float)gtidx, (float)fi);
  56. ploc.x = (gtidx + 0.5f) - (dt * vterm.x * dx);
  57. ploc.y = (fi + 0.5f) - (dt * vterm.y * dy);
  58. vterm = tex2D(texref, ploc.x, ploc.y);
  59. vxterm = vterm.x; vyterm = vterm.y;
  60. vx[fj] = vxterm;
  61. vy[fj] = vyterm;
  62. }
  63. }
  64. }
  65. }
  66. // This method performs velocity diffusion and forces mass conservation
  67. // in the frequency domain. The inputs 'vx' and 'vy' are complex-valued
  68. // arrays holding the Fourier coefficients of the velocity field in
  69. // X and Y. Diffusion in this space takes a simple form described as:
  70. // v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity,
  71. // and k is the wavenumber. The projection step forces the Fourier
  72. // velocity vectors to be orthogonal to the vectors for each
  73. // wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2.
  74. extern "C" __global__ void
  75. diffuseProject_k(
  76. float2 *vx,
  77. float2 *vy,
  78. int dx,
  79. int dy,
  80. float dt,
  81. float visc,
  82. int lb) {
  83. int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
  84. int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
  85. int p;
  86. float2 xterm, yterm;
  87. // gtidx is the domain location in x for this thread
  88. if (gtidx < dx) {
  89. for (p = 0; p < lb; p++) {
  90. // fi is the domain location in y for this thread
  91. int fi = gtidy + p;
  92. if (fi < dy) {
  93. int fj = fi * dx + gtidx;
  94. xterm = vx[fj];
  95. yterm = vy[fj];
  96. // Compute the index of the wavenumber based on the
  97. // data order produced by a standard NN FFT.
  98. int iix = gtidx;
  99. int iiy = (fi>dy/2)?(fi-(dy)):fi;
  100. // Velocity diffusion
  101. float kk = (float)(iix * iix + iiy * iiy); // k^2
  102. float diff = 1.f / (1.f + visc * dt * kk);
  103. xterm.x *= diff; xterm.y *= diff;
  104. yterm.x *= diff; yterm.y *= diff;
  105. // Velocity projection
  106. if (kk > 0.f) {
  107. float rkk = 1.f / kk;
  108. // Real portion of velocity projection
  109. float rkp = (iix * xterm.x + iiy * yterm.x);
  110. // Imaginary portion of velocity projection
  111. float ikp = (iix * xterm.y + iiy * yterm.y);
  112. xterm.x -= rkk * rkp * iix;
  113. xterm.y -= rkk * ikp * iix;
  114. yterm.x -= rkk * rkp * iiy;
  115. yterm.y -= rkk * ikp * iiy;
  116. }
  117. vx[fj] = xterm;
  118. vy[fj] = yterm;
  119. }
  120. }
  121. }
  122. }
  123. // This method updates the velocity field 'v' using the two complex
  124. // arrays from the previous step: 'vx' and 'vy'. Here we scale the
  125. // real components by 1/(dx*dy) to account for an unnormalized FFT.
  126. extern "C" __global__ void
  127. updateVelocity_k(
  128. float2 *v,
  129. float *vx,
  130. float *vy,
  131. int dx,
  132. int pdx,
  133. int dy,
  134. int lb,
  135. size_t pitch,
  136. float scale) {
  137. int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
  138. int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
  139. int p;
  140. float vxterm, vyterm;
  141. float2 nvterm;
  142. // gtidx is the domain location in x for this thread
  143. if (gtidx < dx) {
  144. for (p = 0; p < lb; p++) {
  145. // fi is the domain location in y for this thread
  146. int fi = gtidy + p;
  147. if (fi < dy) {
  148. int fjr = fi * pdx + gtidx;
  149. vxterm = vx[fjr];
  150. vyterm = vy[fjr];
  151. // Normalize the result of the inverse FFT
  152. nvterm.x = vxterm * scale;
  153. nvterm.y = vyterm * scale;
  154. float2 *fj = (float2*)((char*)v + fi * pitch) + gtidx;
  155. *fj = nvterm;
  156. }
  157. } // If this thread is inside the domain in Y
  158. } // If this thread is inside the domain in X
  159. }
  160. // This method updates the particles by moving particle positions
  161. // according to the velocity field and time step. That is, for each
  162. // particle: p(t+1) = p(t) + dt * v(p(t)).
  163. extern "C" __global__ void
  164. advectParticles_k(
  165. float2 *part,
  166. float2 *v,
  167. int dx,
  168. int dy,
  169. float dt,
  170. int lb,
  171. size_t pitch) {
  172. int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
  173. int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
  174. int p;
  175. // gtidx is the domain location in x for this thread
  176. float2 pterm, vterm;
  177. if (gtidx < dx) {
  178. for (p = 0; p < lb; p++) {
  179. // fi is the domain location in y for this thread
  180. int fi = gtidy + p;
  181. if (fi < dy) {
  182. int fj = fi * dx + gtidx;
  183. pterm = part[fj];
  184. int xvi = ((int)(pterm.x * dx));
  185. int yvi = ((int)(pterm.y * dy));
  186. vterm = *((float2*)((char*)v + yvi * pitch) + xvi);
  187. pterm.x += dt * vterm.x;
  188. pterm.x = pterm.x - (int)pterm.x;
  189. pterm.x += 1.f;
  190. pterm.x = pterm.x - (int)pterm.x;
  191. pterm.y += dt * vterm.y;
  192. pterm.y = pterm.y - (int)pterm.y;
  193. pterm.y += 1.f;
  194. pterm.y = pterm.y - (int)pterm.y;
  195. part[fj] = pterm;
  196. }
  197. } // If this thread is inside the domain in Y
  198. } // If this thread is inside the domain in X
  199. }