fScalarProductD.dfm 14 KB


  1. object FormSP: TFormSP
  2. Left = 0
  3. Top = 0
  4. BorderStyle = bsDialog
  5. Caption =
  6. 'Calculation scalar products of a given set of input vector pair' +
  7. 's'
  8. ClientHeight = 365
  9. ClientWidth = 583
  10. Color = clBtnFace
  11. Font.Charset = DEFAULT_CHARSET
  12. Font.Color = clWindowText
  13. Font.Height = -14
  14. Font.Name = 'Tahoma'
  15. Font.Style = []
  16. Position = poScreenCenter
  17. PixelsPerInch = 120
  18. TextHeight = 17
  19. object Memo1: TMemo
  20. Left = 10
  21. Top = 10
  22. Width = 563
  23. Height = 291
  24. Margins.Left = 4
  25. Margins.Top = 4
  26. Margins.Right = 4
  27. Margins.Bottom = 4
  28. TabOrder = 0
  29. end
  30. object Button1: TButton
  31. Left = 479
  32. Top = 324
  33. Width = 94
  34. Height = 31
  35. Margins.Left = 4
  36. Margins.Top = 4
  37. Margins.Right = 4
  38. Margins.Bottom = 4
  39. Caption = 'Run'
  40. TabOrder = 1
  41. OnClick = Button1Click
  42. end
  43. object GLCUDA1: TGLCUDA
  44. ComputingDevice = GLCUDADevice1
  45. Left = 80
  46. Top = 128
  47. object MainModule: TCUDAModule
  48. Code.Strings = (
  49. #9'.version 1.4'
  50. #9'.target sm_13'
  51. #9'// compiled with C:\Program Files\NVIDIA GPU Computing Toolkit\' +
  52. 'CUDA\v3.2\\bin/../open64/lib//be.exe'
  53. #9'// nvopencc 3.2 built on 2010-11-06'
  54. ''
  55. #9'//-----------------------------------------------------------'
  56. #9'// Compiling C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_000009' +
  57. '88_00000000-11_temp.cpp3.i (C:/Users/YARUNA~1/AppData/Local/Temp' +
  58. '/ccBI#.a01408)'
  59. #9'//-----------------------------------------------------------'
  60. ''
  61. #9'//-----------------------------------------------------------'
  62. #9'// Options:'
  63. #9'//-----------------------------------------------------------'
  64. #9'// Target:ptx, ISA:sm_13, Endian:little, Pointer Size:32'
  65. #9'// -O3'#9'(Optimization level)'
  66. #9'// -g0'#9'(Debug level)'
  67. #9'// -m2'#9'(Report advisories)'
  68. #9'//-----------------------------------------------------------'
  69. ''
  70. #9'.file'#9'1'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_00000988_0' +
  71. '0000000-10_temp.cudafe2.gpu"'
  72. #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
  73. 'E\crtdefs.h"'
  74. #9'.file'#9'3'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
  75. '2\include\crt/device_runtime.h"'
  76. #9'.file'#9'4'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
  77. '2\include\host_defines.h"'
  78. #9'.file'#9'5'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
  79. '2\include\builtin_types.h"'
  80. #9'.file'#9'6'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
  81. '2\include\device_types.h"'
  82. #9'.file'#9'7'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
  83. '2\include\driver_types.h"'
  84. #9'.file'#9'8'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
  85. '2\include\surface_types.h"'
  86. #9'.file'#9'9'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
  87. '2\include\texture_types.h"'
  88. #9'.file'#9'10'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  89. '.2\include\vector_types.h"'
  90. #9'.file'#9'11'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  91. '.2\include\builtin_types.h"'
  92. #9'.file'#9'12'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  93. '.2\include\host_defines.h"'
  94. #9'.file'#9'13'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3' +
  95. '.2\include\device_launch_parameters.h"'
  96. #9'.file'#9'14'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  97. '.2\include\crt\storage_class.h"'
  98. #9'.file'#9'15'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' +
  99. 'DE\time.h"'
  100. #9'.file'#9'16'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  101. '.2\include\texture_fetch_functions.h"'
  102. #9'.file'#9'17'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3' +
  103. '.2\include\common_functions.h"'
  104. #9'.file'#9'18'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  105. '.2\include\math_functions.h"'
  106. #9'.file'#9'19'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  107. '.2\include\math_constants.h"'
  108. #9'.file'#9'20'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  109. '.2\include\device_functions.h"'
  110. #9'.file'#9'21'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  111. '.2\include\sm_11_atomic_functions.h"'
  112. #9'.file'#9'22'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  113. '.2\include\sm_12_atomic_functions.h"'
  114. #9'.file'#9'23'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  115. '.2\include\sm_13_double_functions.h"'
  116. #9'.file'#9'24'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  117. '.2\include\sm_20_atomic_functions.h"'
  118. #9'.file'#9'25'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  119. '.2\include\sm_20_intrinsics.h"'
  120. #9'.file'#9'26'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  121. '.2\include\surface_functions.h"'
  122. #9'.file'#9'27'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
  123. '.2\include\math_functions_dbl_ptx3.h"'
  124. #9'.file'#9'28'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/temp.cu"'
  125. ''
  126. ''
  127. #9'.entry _Z13scalarProdGPUPfS_S_ii ('
  128. #9#9'.param .u32 __cudaparm__Z13scalarProdGPUPfS_S_ii_d_C,'
  129. #9#9'.param .u32 __cudaparm__Z13scalarProdGPUPfS_S_ii_d_A,'
  130. #9#9'.param .u32 __cudaparm__Z13scalarProdGPUPfS_S_ii_d_B,'
  131. #9#9'.param .s32 __cudaparm__Z13scalarProdGPUPfS_S_ii_vectorN,'
  132. #9#9'.param .s32 __cudaparm__Z13scalarProdGPUPfS_S_ii_elementN)'
  133. #9'{'
  134. #9'.reg .u16 %rh<3>;'
  135. #9'.reg .u32 %r<46>;'
  136. #9'.reg .f32 %f<9>;'
  137. #9'.reg .pred %p<12>;'
  138. #9'.shared .align 4 .b8 __cuda___cuda_local_var_83325_34_non_const' +
  139. '_accumResult20[4096];'
  140. #9'.loc'#9'28'#9'41'#9'0'
  141. '$LDWbegin__Z13scalarProdGPUPfS_S_ii:'
  142. #9'.loc'#9'28'#9'51'#9'0'
  143. #9'cvt.s32.u16 '#9'%r1, %ctaid.x;'
  144. #9'mov.s32 '#9'%r2, %r1;'
  145. #9'ld.param.s32 '#9'%r3, [__cudaparm__Z13scalarProdGPUPfS_S_ii_vector' +
  146. 'N];'
  147. #9'setp.le.s32 '#9'%p1, %r3, %r1;'
  148. #9'@%p1 bra '#9'$Lt_0_6146;'
  149. #9'cvt.s32.u16 '#9'%r4, %tid.x;'
  150. #9'mov.s32 '#9'%r5, 1023;'
  151. #9'setp.le.s32 '#9'%p2, %r4, %r5;'
  152. #9'mov.u32 '#9'%r6, 0;'
  153. #9'setp.eq.u32 '#9'%p3, %r4, %r6;'
  154. #9'cvt.u32.u16 '#9'%r7, %nctaid.x;'
  155. #9'mov.u32 '#9'%r8, __cuda___cuda_local_var_83325_34_non_const_accumR' +
  156. 'esult20;'
  157. '$Lt_0_6658:'
  158. ' //<loop> Loop body line 51, nesting depth: 1, estimated iterati' +
  159. 'ons: unknown'
  160. #9'@!%p2 bra '#9'$Lt_0_6914;'
  161. ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
  162. #9'ld.param.s32 '#9'%r9, [__cudaparm__Z13scalarProdGPUPfS_S_ii_elemen' +
  163. 'tN];'
  164. #9'mul24.lo.s32 '#9'%r10, %r9, %r2;'
  165. #9'add.s32 '#9'%r11, %r10, %r9;'
  166. #9'mul24.lo.u32 '#9'%r12, %r4, 4;'
  167. #9'cvt.u32.u16 '#9'%r13, %ntid.x;'
  168. #9'mul24.lo.u32 '#9'%r14, %r13, 4;'
  169. #9'add.s32 '#9'%r15, %r10, %r4;'
  170. #9'add.u32 '#9'%r16, %r12, %r8;'
  171. #9'add.u32 '#9'%r17, %r8, 4092;'
  172. '$Lt_0_7426:'
  173. ' //<loop> Loop body line 51, nesting depth: 2, estimated iterati' +
  174. 'ons: unknown'
  175. #9'.loc'#9'28'#9'64'#9'0'
  176. #9'mov.s32 '#9'%r18, %r15;'
  177. #9'setp.le.s32 '#9'%p4, %r11, %r18;'
  178. #9'@%p4 bra '#9'$Lt_0_12290;'
  179. ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
  180. #9'sub.s32 '#9'%r19, %r11, %r18;'
  181. #9'add.s32 '#9'%r20, %r19, 1023;'
  182. #9'shr.s32 '#9'%r21, %r20, 31;'
  183. #9'mov.s32 '#9'%r22, 1023;'
  184. #9'and.b32 '#9'%r23, %r21, %r22;'
  185. #9'add.s32 '#9'%r24, %r23, %r20;'
  186. #9'shr.s32 '#9'%r25, %r24, 10;'
  187. #9'mul.lo.u32 '#9'%r26, %r18, 4;'
  188. #9'ld.param.u32 '#9'%r27, [__cudaparm__Z13scalarProdGPUPfS_S_ii_d_A];'
  189. #9'add.u32 '#9'%r28, %r26, %r27;'
  190. #9'ld.param.u32 '#9'%r29, [__cudaparm__Z13scalarProdGPUPfS_S_ii_d_B];'
  191. #9'add.u32 '#9'%r30, %r29, %r26;'
  192. #9'mul.lo.u32 '#9'%r31, %r11, 4;'
  193. #9'add.u32 '#9'%r32, %r31, %r27;'
  194. #9'mov.f32 '#9'%f1, 0f00000000; '#9'// 0'
  195. ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
  196. #9'mov.s32 '#9'%r33, %r25;'
  197. '$Lt_0_8194:'
  198. ' //<loop> Loop body line 64, nesting depth: 3, estimated iterati' +
  199. 'ons: unknown'
  200. #9'.loc'#9'28'#9'65'#9'0'
  201. #9'ld.global.f32 '#9'%f2, [%r28+0];'
  202. #9'ld.global.f32 '#9'%f3, [%r30+0];'
  203. #9'mad.f32 '#9'%f1, %f2, %f3, %f1;'
  204. #9'add.u32 '#9'%r30, %r30, 4096;'
  205. #9'add.u32 '#9'%r28, %r28, 4096;'
  206. #9'setp.lt.u32 '#9'%p5, %r28, %r32;'
  207. #9'@%p5 bra '#9'$Lt_0_8194;'
  208. ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
  209. #9'bra.uni '#9'$Lt_0_7682;'
  210. '$Lt_0_12290:'
  211. ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
  212. #9'mov.f32 '#9'%f1, 0f00000000; '#9'// 0'
  213. '$Lt_0_7682:'
  214. ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
  215. #9'.loc'#9'28'#9'67'#9'0'
  216. #9'st.shared.f32 '#9'[%r16+0], %f1;'
  217. #9'add.s32 '#9'%r15, %r18, %r13;'
  218. #9'add.u32 '#9'%r16, %r16, %r14;'
  219. #9'setp.le.u32 '#9'%p6, %r16, %r17;'
  220. #9'@%p6 bra '#9'$Lt_0_7426;'
  221. '$Lt_0_6914:'
  222. ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
  223. #9'mov.s32 '#9'%r34, 512;'
  224. '$Lt_0_9474:'
  225. ' //<loop> Loop body line 67, nesting depth: 2, estimated iterati' +
  226. 'ons: unknown'
  227. #9'.loc'#9'28'#9'75'#9'0'
  228. #9'bar.sync '#9'0;'
  229. #9'setp.ge.s32 '#9'%p7, %r4, %r34;'
  230. #9'@%p7 bra '#9'$Lt_0_9730;'
  231. ' //<loop> Part of loop body line 67, head labeled $Lt_0_9474'
  232. #9'mul24.lo.u32 '#9'%r35, %r4, 4;'
  233. #9'mov.u16 '#9'%rh1, %ntid.x;'
  234. #9'mul.wide.u16 '#9'%r14, %rh1, 4;'
  235. #9'mul.lo.u32 '#9'%r36, %r34, 4;'
  236. #9'add.u32 '#9'%r37, %r35, %r8;'
  237. #9'add.u32 '#9'%r38, %r36, %r8;'
  238. #9'add.s32 '#9'%r39, %r36, %r35;'
  239. #9'add.u32 '#9'%r40, %r39, %r8;'
  240. '$Lt_0_10242:'
  241. ' //<loop> Loop body line 75, nesting depth: 2, estimated iterati' +
  242. 'ons: unknown'
  243. #9'.loc'#9'28'#9'77'#9'0'
  244. #9'ld.shared.f32 '#9'%f4, [%r37+0];'
  245. #9'ld.shared.f32 '#9'%f5, [%r40+0];'
  246. #9'add.f32 '#9'%f6, %f4, %f5;'
  247. #9'st.shared.f32 '#9'[%r37+0], %f6;'
  248. #9'add.u32 '#9'%r40, %r40, %r14;'
  249. #9'add.u32 '#9'%r37, %r37, %r14;'
  250. #9'setp.lt.u32 '#9'%p8, %r37, %r38;'
  251. #9'@%p8 bra '#9'$Lt_0_10242;'
  252. '$Lt_0_9730:'
  253. ' //<loop> Part of loop body line 67, head labeled $Lt_0_9474'
  254. #9'.loc'#9'28'#9'74'#9'0'
  255. #9'shr.s32 '#9'%r34, %r34, 1;'
  256. #9'mov.u32 '#9'%r41, 0;'
  257. #9'setp.gt.s32 '#9'%p9, %r34, %r41;'
  258. #9'@%p9 bra '#9'$Lt_0_9474;'
  259. ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
  260. #9'@!%p3 bra '#9'$Lt_0_11010;'
  261. ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
  262. #9'.loc'#9'28'#9'80'#9'0'
  263. #9'ld.shared.f32 '#9'%f7, [__cuda___cuda_local_var_83325_34_non_const' +
  264. '_accumResult20+0];'
  265. #9'ld.param.u32 '#9'%r42, [__cudaparm__Z13scalarProdGPUPfS_S_ii_d_C];'
  266. #9'mul.lo.u32 '#9'%r43, %r2, 4;'
  267. #9'add.u32 '#9'%r44, %r42, %r43;'
  268. #9'st.global.f32 '#9'[%r44+0], %f7;'
  269. '$Lt_0_11010:'
  270. ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
  271. #9'add.u32 '#9'%r2, %r2, %r7;'
  272. #9'setp.lt.s32 '#9'%p10, %r2, %r3;'
  273. #9'@%p10 bra '#9'$Lt_0_6658;'
  274. '$Lt_0_6146:'
  275. #9'.loc'#9'28'#9'82'#9'0'
  276. #9'exit;'
  277. '$LDWend__Z13scalarProdGPUPfS_S_ii:'
  278. #9'} // _Z13scalarProdGPUPfS_S_ii'
  279. '')
  280. Compiler = GLCUDACompiler1
  281. object scalarProdGPU: TCUDAFunction
  282. KernelName = '_Z13scalarProdGPUPfS_S_ii'
  283. BlockShape.SizeX = 128
  284. Grid.SizeX = 256
  285. OnParameterSetup = scalarProdGPUParameterSetup
  286. object _Z13scalarProdGPUPfS_S_ii_d_C: TCUDAFuncParam
  287. KernelName = 'd_C'
  288. DataType = float1
  289. Size = 0
  290. Reference = True
  291. end
  292. object _Z13scalarProdGPUPfS_S_ii_d_A: TCUDAFuncParam
  293. KernelName = 'd_A'
  294. DataType = float1
  295. Size = 0
  296. Reference = True
  297. end
  298. object _Z13scalarProdGPUPfS_S_ii_d_B: TCUDAFuncParam
  299. KernelName = 'd_B'
  300. DataType = float1
  301. Size = 0
  302. Reference = True
  303. end
  304. object _Z13scalarProdGPUPfS_S_ii_vectorN: TCUDAFuncParam
  305. KernelName = 'vectorN'
  306. DataType = int1
  307. Size = 0
  308. Reference = False
  309. end
  310. object _Z13scalarProdGPUPfS_S_ii_elementN: TCUDAFuncParam
  311. KernelName = 'elementN'
  312. DataType = int1
  313. Size = 0
  314. Reference = False
  315. end
  316. end
  317. end
  318. object deviceA: TCUDAMemData
  319. MemoryType = mtDevice
  320. ChannelsType = ctFloat
  321. end
  322. object deviceB: TCUDAMemData
  323. MemoryType = mtDevice
  324. ChannelsType = ctFloat
  325. end
  326. object deviceC: TCUDAMemData
  327. MemoryType = mtDevice
  328. ChannelsType = ctFloat
  329. end
  330. object hostC_GPU: TCUDAMemData
  331. ChannelsType = ctFloat
  332. end
  333. object hostB: TCUDAMemData
  334. ChannelsType = ctFloat
  335. end
  336. object hostC_CPU: TCUDAMemData
  337. ChannelsType = ctFloat
  338. end
  339. object hostA: TCUDAMemData
  340. ChannelsType = ctFloat
  341. end
  342. end
  343. object GLCUDADevice1: TGLCUDADevice
  344. SelectDevice = 'GeForce GTX 1050 Ti (1)'
  345. Left = 408
  346. Top = 128
  347. end
  348. object GLCUDACompiler1: TGLCUDACompiler
  349. ProjectModule = 'scalarProd_kernel.cu'
  350. Left = 240
  351. Top = 128
  352. end
  353. end