CUDA.Import.pas 108 KB


  1. //
  2. // The graphics engine GLScene
  3. //
  4. unit CUDA.Import;
  5. (*
  6. * Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
  7. *
  8. * NOTICE TO USER:
  9. *
  10. * This source code is subject to NVIDIA ownership rights under U.S. and
  11. * international Copyright laws. Users and possessors of this source code
  12. * are hereby granted a nonexclusive, royalty-free license to use this code
  13. * in individual and commercial software.
  14. *
  15. * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
  16. * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
  17. * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
  18. * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
  19. * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
  20. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
  21. * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
  22. * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  23. * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
  24. * OR PERFORMANCE OF THIS SOURCE CODE.
  25. *
  26. * U.S. Government End Users. This source code is a "commercial item" as
  27. * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
  28. * "commercial computer software" and "commercial computer software
  29. * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
  30. * and is provided to the U.S. Government only as a commercial end item.
  31. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
  32. * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
  33. * source code with only those rights set forth herein.
  34. *
  35. * Any use of this source code in individual and commercial software must
  36. * include, in the user documentation and internal comments to the code,
  37. * the above Disclaimer and U.S. Government End Users Notice.
  38. *)
  39. interface
  40. uses
  41. {$IFDEF MSWINDOWS}
  42. Winapi.Windows;
  43. {$ELSE}
  44. Windows;
  45. {$ENDIF}
  46. const
  47. CUDAAPIDLL = 'nvcuda.dll';
  48. type
  49. // CUDA device pointer
  50. TCUdeviceptr = Pointer;
  51. // CUDA device
  52. TCUdevice = Integer;
  53. // CUDA context
  54. PCUcontext = ^TCUcontext;
  55. TCUcontext = record
  56. end;
  57. // CUDA module
  58. PCUmodule = ^TCUmodule;
  59. TCUmodule = record
  60. end;
  61. // CUDA function
  62. PCUfunction = ^TCUfunction;
  63. TCUfunction = record
  64. end;
  65. // CUDA array
  66. PCUarray = ^TCUarray;
  67. TCUarray = record
  68. end;
  69. // CUDA texture reference
  70. PCUtexref = ^TCUtexref;
  71. TCUtexref = record
  72. end;
  73. // CUDA event
  74. PCUevent = ^TCUevent;
  75. TCUevent = record
  76. end;
  77. // CUDA stream
  78. PCUstream = ^TCUstream;
  79. TCUstream = record
  80. end;
  81. // CUDA graphics interop resource
  82. PPCUgraphicsResource = ^PCUgraphicsResource;
  83. PCUgraphicsResource = ^TCUgraphicsResource;
  84. TCUgraphicsResource = record
  85. end;
  86. // Context creation flags
  87. TCUctx_flags = (
  88. // Automatic scheduling
  89. CU_CTX_SCHED_AUTO = 0,
  90. // Set spin as default scheduling
  91. CU_CTX_SCHED_SPIN = 1,
  92. // Set yield as default scheduling
  93. CU_CTX_SCHED_YIELD = 2,
  94. CU_CTX_SCHED_MASK = 3,
  95. // Use blocking synchronization
  96. CU_CTX_BLOCKING_SYNC = 4,
  97. // Support mapped pinned allocations
  98. CU_CTX_MAP_HOST = 8,
  99. CU_CTX_FLAGS_MASK = 15);
  100. // Event creation flags
  101. TCUevent_flags = (
  102. // Default event flag
  103. CU_EVENT_DEFAULT = 0,
  104. // Event uses blocking synchronization
  105. CU_EVENT_BLOCKING_SYNC = 1
  106. );
  107. // Array formats
  108. TCUarray_format = (
  109. // Unsigned 8-bit integers
  110. CU_AD_FORMAT_UNSIGNED_INT8 = $01,
  111. // Unsigned 16-bit integers
  112. CU_AD_FORMAT_UNSIGNED_INT16 = $02,
  113. // Unsigned 32-bit integers
  114. CU_AD_FORMAT_UNSIGNED_INT32 = $03,
  115. // Signed 8-bit integers
  116. CU_AD_FORMAT_SIGNED_INT8 = $08,
  117. // Signed 16-bit integers
  118. CU_AD_FORMAT_SIGNED_INT16 = $09,
  119. // Signed 32-bit integers
  120. CU_AD_FORMAT_SIGNED_INT32 = $0A,
  121. // 16-bit floating point
  122. CU_AD_FORMAT_HALF = $10,
  123. // 32-bit floating point
  124. CU_AD_FORMAT_FLOAT = $20
  125. );
  126. // Texture reference addressing modes
  127. TCUaddress_mode = (
  128. // Wrapping address mode
  129. CU_TR_ADDRESS_MODE_WRAP = 0,
  130. // Clamp to edge address mode
  131. CU_TR_ADDRESS_MODE_CLAMP = 1,
  132. // Mirror address mode
  133. CU_TR_ADDRESS_MODE_MIRROR = 2
  134. );
  135. // Texture reference filtering modes
  136. TCUfilter_mode = (
  137. // Point filter mode
  138. CU_TR_FILTER_MODE_POINT = 0,
  139. // Linear filter mode
  140. CU_TR_FILTER_MODE_LINEAR = 1
  141. );
  142. // Device properties
  143. TCUdevice_attribute = (
  144. // Maximum number of threads per block
  145. CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
  146. // Maximum block dimension X
  147. CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
  148. // Maximum block dimension Y
  149. CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
  150. // Maximum block dimension Z
  151. CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
  152. // Maximum grid dimension X
  153. CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
  154. // Maximum grid dimension Y
  155. CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
  156. // Maximum grid dimension Z
  157. CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
  158. // Maximum shared memory available per block in bytes
  159. CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
  160. // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
  161. CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
  162. // Memory available on device for __constant__ variables in a CUDA C kernel in bytes
  163. CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
  164. // Warp size in threads
  165. CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
  166. // Maximum pitch in bytes allowed by memory copies
  167. CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
  168. // Maximum number of 32-bit registers available per block
  169. CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
  170. // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
  171. CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
  172. // Peak clock frequency in kilohertz
  173. CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
  174. // Alignment requirement for textures
  175. CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
  176. // Device can possibly copy memory and execute a kernel concurrently
  177. CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
  178. // Number of multiprocessors on device
  179. CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
  180. // Specifies whether there is a run time limit on kernels
  181. CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
  182. // Device is integrated with host memory
  183. CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
  184. // Device can map host memory into CUDA address space
  185. CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
  186. // Compute mode (See ::CUcomputemode for details)
  187. CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
  188. );
  189. (* *
  190. * CUDA Limits
  191. *)
  192. TcudaLimit = (
  193. // GPU thread stack size
  194. cudaLimitStackSize = $00,
  195. // GPU printf FIFO size
  196. cudaLimitPrintfFifoSize = $01
  197. );
  198. // Legacy device properties
  199. TCUdevprop = record
  200. // Maximum number of threads per block
  201. maxThreadsPerBlock: Integer;
  202. // Maximum size of each dimension of a block
  203. maxThreadsDim: array [0 .. 2] of Integer;
  204. // Maximum size of each dimension of a grid
  205. maxGridSize: array [0 .. 2] of Integer;
  206. // Shared memory available per block in bytes
  207. sharedMemPerBlock: Integer;
  208. // Constant memory available on device in bytes
  209. totalConstantMemory: Integer;
  210. // Warp size in threads
  211. SIMDWidth: Integer;
  212. // Maximum pitch in bytes allowed by memory copies
  213. memPitch: Integer;
  214. // 32-bit registers available per block
  215. regsPerBlock: Integer;
  216. // Clock frequency in kilohertz
  217. clockRate: Integer;
  218. // Alignment requirement for textures
  219. textureAlign: Integer;
  220. end;
  221. // Function properties
  222. TCUfunction_attribute = (
  223. (* The number of threads beyond which a launch of the function would fail.
  224. * This number depends on both the function and the device on which the
  225. * function is currently loaded. *)
  226. CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
  227. (* The size in bytes of statically-allocated shared memory required by
  228. * this function. This does not include dynamically-allocated shared
  229. * memory requested by the user at runtime. *)
  230. CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
  231. { * The size in bytes of user-allocated constant memory required by this
  232. * function. }
  233. CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
  234. { * The size in bytes of thread local memory used by this function. }
  235. CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
  236. { * The number of registers used by each thread of this function. }
  237. CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
  238. CU_FUNC_ATTRIBUTE_MAX);
  239. // Memory types
  240. TCUmemorytype = (
  241. // Host memory
  242. CU_MEMORYTYPE_HOST = $01,
  243. // Device memory
  244. CU_MEMORYTYPE_DEVICE = $02,
  245. // Array memory
  246. CU_MEMORYTYPE_ARRAY = $03
  247. );
  248. // Compute Modes
  249. TCUcomputemode = (
  250. // Default compute mode (Multiple contexts allowed per device)
  251. CU_COMPUTEMODE_DEFAULT = 0,
  252. // Compute-exclusive mode (Only one context can be present on this device at a time)
  253. CU_COMPUTEMODE_EXCLUSIVE = 1,
  254. // Compute-prohibited mode (No contexts can be created on this device at this time)
  255. CU_COMPUTEMODE_PROHIBITED = 2
  256. );
  257. // Online compiler options
  258. TCUjit_option = (
  259. { * Max number of registers that a thread may use. }
  260. CU_JIT_MAX_REGISTERS = 0,
  261. { * IN: Specifies minimum number of threads per block to target compilation
  262. * for\n
  263. * OUT: Returns the number of threads the compiler actually targeted.
  264. * This restricts the resource utilization fo the compiler (e.g. max
  265. * registers) such that a block with the given number of threads should be
  266. * able to launch based on register limitations. Note, this option does not
  267. * currently take into account any other resource limitations, such as
  268. * shared memory utilization. }
  269. CU_JIT_THREADS_PER_BLOCK,
  270. { * Returns a float value in the option of the wall clock time, in
  271. * milliseconds, spent creating the cubin }
  272. CU_JIT_WALL_TIME,
  273. { * Pointer to a buffer in which to print any log messsages from PTXAS
  274. * that are informational in nature }
  275. CU_JIT_INFO_LOG_BUFFER,
  276. { * IN: Log buffer size in bytes. Log messages will be capped at this size
  277. * (including null terminator)\n
  278. * OUT: Amount of log buffer filled with messages }
  279. CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
  280. { * Pointer to a buffer in which to print any log messages from PTXAS that
  281. * reflect errors }
  282. CU_JIT_ERROR_LOG_BUFFER,
  283. { * IN: Log buffer size in bytes. Log messages will be capped at this size
  284. * (including null terminator)\n
  285. * OUT: Amount of log buffer filled with messages }
  286. CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
  287. { * Level of optimizations to apply to generated code (0 - 4), with 4
  288. * being the default and highest level of optimizations. }
  289. CU_JIT_OPTIMIZATION_LEVEL,
  290. { * No option value required. Determines the target based on the current
  291. * attached context (default) }
  292. CU_JIT_TARGET_FROM_CUCONTEXT,
  293. { * Target is chosen based on supplied CUjit_target_enum. }
  294. CU_JIT_TARGET,
  295. { * Specifies choice of fallback strategy if matching cubin is not found.
  296. * Choice is based on supplied CUjit_fallback_enum. }
  297. CU_JIT_FALLBACK_STRATEGY );
  298. // Online compilation targets
  299. TCUjit_target = (
  300. // Compute device class 1.0
  301. CU_TARGET_COMPUTE_10 = 0,
  302. // Compute device class 1.1
  303. CU_TARGET_COMPUTE_11,
  304. // Compute device class 1.2
  305. CU_TARGET_COMPUTE_12,
  306. // Compute device class 1.3
  307. CU_TARGET_COMPUTE_13
  308. );
  309. // Cubin matching fallback strategies
  310. TCUjit_fallback = (
  311. // ** Prefer to compile ptx */
  312. CU_PREFER_PTX = 0,
  313. // ** Prefer to fall back to compatible binary code */
  314. CU_PREFER_BINARY);
  315. // Flags to register a graphics resource
  316. TCUgraphicsRegisterFlags = (CU_GRAPHICS_REGISTER_FLAGS_NONE = $00000000);
  317. // Flags for mapping and unmapping interop resources
  318. TCUgraphicsMapResourceFlags =
  319. (CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = $00000000,
  320. CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = $00000001,
  321. CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = $00000002);
  322. // Array indices for cube faces
  323. TCUarray_cubemap_face = (
  324. // Positive X face of cubemap
  325. CU_CUBEMAP_FACE_POSITIVE_X = $00000000,
  326. // Negative X face of cubemap
  327. CU_CUBEMAP_FACE_NEGATIVE_X = $00000001,
  328. // Positive Y face of cubemap
  329. CU_CUBEMAP_FACE_POSITIVE_Y = $00000002,
  330. // Negative Y face of cubemap
  331. CU_CUBEMAP_FACE_NEGATIVE_Y = $00000003,
  332. // Positive Z face of cubemap
  333. CU_CUBEMAP_FACE_POSITIVE_Z = $00000004,
  334. // Negative Z face of cubemap
  335. CU_CUBEMAP_FACE_NEGATIVE_Z = $00000005
  336. );
  337. (*
  338. * CUDA function attributes
  339. *)
  340. TcudaFuncAttributes = record
  341. // Size of shared memory in bytes
  342. sharedSizeBytes: NativeUInt;
  343. // Size of constant memory in bytes
  344. constSizeBytes: NativeUInt;
  345. // Size of local memory in bytes
  346. localSizeBytes: NativeUInt;
  347. // Maximum number of threads per block
  348. maxThreadsPerBlock: Integer;
  349. // Number of registers used
  350. numRegs: Integer;
  351. (* \brief PTX virtual architecture version for which the function was
  352. * compiled. This value is the major PTX version * 10 + the minor PTX
  353. * version, so a PTX version 1.3 function would return the value 13.
  354. * For device emulation kernels, this is set to 9999. *)
  355. ptxVersion: Integer;
  356. (* * \brief Binary architecture version for which the function was compiled.
  357. * This value is the major binary version * 10 + the minor binary version,
  358. * so a binary version 1.3 function would return the value 13.
  359. * For device emulation kernels, this is set to 9999. *)
  360. binaryVersion: Integer;
  361. __cudaReserved: array [0 .. 5] of Integer;
  362. end;
  363. (* *
  364. * CUDA function cache configurations
  365. *)
  366. TcudaFuncCache = (
  367. // Default function cache configuration, no preference
  368. cudaFuncCachePreferNone = 0,
  369. // Prefer larger shared memory and smaller L1 cache
  370. cudaFuncCachePreferShared = 1,
  371. // Prefer larger L1 cache and smaller shared memory
  372. cudaFuncCachePreferL1 = 2
  373. );
  374. // ************************************
  375. // **
  376. // ** Error codes
  377. // **
  378. // ***********************************/
  379. // Error codes
  380. TCUresult = type Cardinal;
  381. const
  382. CUDA_SUCCESS: TCUresult = 0; /// < No errors
  383. CUDA_ERROR_INVALID_VALUE = 1; /// < Invalid value
  384. CUDA_ERROR_OUT_OF_MEMORY = 2; /// < Out of memory
  385. CUDA_ERROR_NOT_INITIALIZED = 3; /// < Driver not initialized
  386. CUDA_ERROR_DEINITIALIZED = 4; /// < Driver deinitialized
  387. CUDA_ERROR_NO_DEVICE = 100; /// < No CUDA-capable device available
  388. CUDA_ERROR_INVALID_DEVICE = 101; /// < Invalid device
  389. CUDA_ERROR_INVALID_IMAGE = 200; /// < Invalid kernel image
  390. CUDA_ERROR_INVALID_CONTEXT = 201; /// < Invalid context
  391. CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202; /// < Context already current
  392. CUDA_ERROR_MAP_FAILED = 205; /// < Map failed
  393. CUDA_ERROR_UNMAP_FAILED = 206; /// < Unmap failed
  394. CUDA_ERROR_ARRAY_IS_MAPPED = 207; /// < Array is mapped
  395. CUDA_ERROR_ALREADY_MAPPED = 208; /// < Already mapped
  396. CUDA_ERROR_NO_BINARY_FOR_GPU = 209; /// < No binary for GPU
  397. CUDA_ERROR_ALREADY_ACQUIRED = 210; /// < Already acquired
  398. CUDA_ERROR_NOT_MAPPED = 211; /// < Not mapped
  399. CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212; /// < Mapped resource not available for access as an array
  400. CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213; /// < Mapped resource not available for access as a pointer
  401. CUDA_ERROR_INVALID_SOURCE = 300; /// < Invalid source
  402. CUDA_ERROR_FILE_NOT_FOUND = 301; /// < File not found
  403. CUDA_ERROR_INVALID_HANDLE = 400; /// < Invalid handle
  404. CUDA_ERROR_NOT_FOUND = 500; /// < Not found
  405. CUDA_ERROR_NOT_READY = 600; /// < CUDA not ready
  406. CUDA_ERROR_LAUNCH_FAILED = 700; /// < Launch failed
  407. CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701; /// < Launch exceeded resources
  408. CUDA_ERROR_LAUNCH_TIMEOUT = 702; /// < Launch exceeded timeout
  409. CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703; /// < Launch with incompatible texturing
  410. CUDA_ERROR_POINTER_IS_64BIT = 800; /// < Attempted to retrieve 64-bit pointer via 32-bit API function
  411. CUDA_ERROR_SIZE_IS_64BIT = 801; /// < Attempted to retrieve 64-bit size via 32-bit API function
  412. CUDA_ERROR_UNKNOWN = 999; /// < Unknown error
  413. const
  414. { * If set, host memory is portable between CUDA contexts.
  415. * Flag for ::cuMemHostAlloc() }
  416. CU_MEMHOSTALLOC_PORTABLE = $01;
  417. { * If set, host memory is mapped into CUDA address space and
  418. * ::cuMemHostGetDevicePointer() may be called on the host pointer.
  419. * Flag for ::cuMemHostAlloc() }
  420. CU_MEMHOSTALLOC_DEVICEMAP = $02;
  421. { * If set, host memory is allocated as write-combined - fast to write,
  422. * faster to DMA, slow to read except via SSE4 streaming load instruction
  423. * (MOVNTDQA).
  424. * Flag for ::cuMemHostAlloc() }
  425. CU_MEMHOSTALLOC_WRITECOMBINED = $04;
  426. // 2D memory copy parameters
  427. type
  428. PCUDA_MEMCPY2D = ^TCUDA_MEMCPY2D;
  429. TCUDA_MEMCPY2D = record
  430. srcXInBytes, /// < Source X in bytes
  431. srcY: Cardinal; /// < Source Y
  432. srcMemoryType: TCUmemorytype; /// < Source memory type (host, device, array)
  433. srcHost: Pointer; /// < Source host pointer
  434. srcDevice: TCUdeviceptr; /// < Source device pointer
  435. srcArray: PCUarray; /// < Source array reference
  436. srcPitch: Cardinal; /// < Source pitch (ignored when src is array)
  437. dstXInBytes, /// < Destination X in bytes
  438. dstY: Cardinal; /// < Destination Y
  439. dstMemoryType: TCUmemorytype; /// < Destination memory type (host, device, array)
  440. dstHost: Pointer; /// < Destination host pointer
  441. dstDevice: TCUdeviceptr; /// < Destination device pointer
  442. dstArray: PCUarray; /// < Destination array reference
  443. dstPitch: Cardinal; /// < Destination pitch (ignored when dst is array)
  444. WidthInBytes: Cardinal; /// < Width of 2D memory copy in bytes
  445. Height: Cardinal; /// < Height of 2D memory copy
  446. end;
  447. // 3D memory copy parameters
  448. TCUDA_MEMCPY3D = record
  449. srcXInBytes, /// < Source X in bytes
  450. srcY, /// < Source Y
  451. srcZ: Cardinal; /// < Source Z
  452. srcLOD: Cardinal; /// < Source LOD
  453. srcMemoryType: TCUmemorytype; /// < Source memory type (host, device, array)
  454. srcHost: Pointer; /// < Source host pointer
  455. srcDevice: TCUdeviceptr; /// < Source device pointer
  456. srcArray: PCUarray; /// < Source array reference
  457. reserved0: Pointer; /// < Must be NULL
  458. srcPitch: Cardinal; /// < Source pitch (ignored when src is array)
  459. srcHeight: Cardinal; /// < Source height (ignored when src is array; may be 0 if Depth==1)
  460. dstXInBytes, /// < Destination X in bytes
  461. dstY, /// < Destination Y
  462. dstZ: Cardinal; /// < Destination Z
  463. dstLOD: Cardinal; /// < Destination LOD
  464. dstMemoryType: TCUmemorytype; /// < Destination memory type (host, device, array)
  465. dstHost: Pointer; /// < Destination host pointer
  466. dstDevice: TCUdeviceptr; /// < Destination device pointer
  467. dstArray: PCUarray; /// < Destination array reference
  468. reserved1: Pointer; /// < Must be NULL
  469. dstPitch: Cardinal; /// < Destination pitch (ignored when dst is array)
  470. dstHeight: Cardinal; /// < Destination height (ignored when dst is array; may be 0 if Depth==1)
  471. WidthInBytes: Cardinal; /// < Width of 3D memory copy in bytes
  472. Height: Cardinal; /// < Height of 3D memory copy
  473. Depth: Cardinal; /// < Depth of 3D memory copy
  474. end;
  475. // Array descriptor
  476. PCUDA_ARRAY_DESCRIPTOR = ^TCUDA_ARRAY_DESCRIPTOR;
  477. TCUDA_ARRAY_DESCRIPTOR = record
  478. Width: Cardinal; /// < Width of array
  479. Height: Cardinal; /// < Height of array
  480. Format: TCUarray_format; /// < Array format
  481. NumChannels: Cardinal; /// < Channels per array element
  482. end;
  483. // 3D array descriptor
  484. TCUDA_ARRAY3D_DESCRIPTOR = record
  485. Width: Cardinal; /// < Width of 3D array
  486. Height: Cardinal; /// < Height of 3D array
  487. Depth: Cardinal; /// < Depth of 3D array
  488. Format: TCUarray_format; /// < Array format
  489. NumChannels: Cardinal; /// < Channels per array element
  490. Flags: Cardinal; /// < Flags
  491. end;
  492. // Flags to map or unmap a resource
  493. TCUGLmap_flags = (CU_GL_MAP_RESOURCE_FLAGS_NONE,
  494. CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY, CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
  495. const
  496. { * Override the texref format with a format inferred from the array.
  497. * Flag for ::cuTexRefSetArray() }
  498. CU_TRSA_OVERRIDE_FORMAT = $01;
  499. { * Read the texture as integers rather than promoting the values to floats
  500. * in the range [0,1].
  501. * Flag for ::cuTexRefSetFlags() }
  502. CU_TRSF_READ_AS_INTEGER = $01;
  503. { * Use normalized texture coordinates in the range [0,1) instead of [0,dim).
  504. * Flag for ::cuTexRefSetFlags() }
  505. CU_TRSF_NORMALIZED_COORDINATES = $02;
  506. { * For texture references loaded into the module, use default texunit from
  507. * texture reference. }
  508. CU_PARAM_TR_DEFAULT = -1;
  509. type
  510. TDim3 = array [0 .. 2] of LongWord;
  511. {$IFDEF MSWINDOWS}
  512. type
  513. HGPUNV = Pointer;
  514. {$ENDIF}
  515. type
  516. TcuInit = function(Flags: Cardinal): TCUresult;stdcall;
  517. TcuDriverGetVersion = function(out driverVersion: Integer): TCUresult;stdcall;
  518. TcuDeviceGet = function(var device: TCUdevice; ordinal: Integer): TCUresult;stdcall;
  519. TcuDeviceGetCount = function(var count: Integer): TCUresult;stdcall;
  520. TcuDeviceGetName = function(name: PAnsiChar; len: Integer; dev: TCUdevice): TCUresult;stdcall;
  521. TcuDeviceComputeCapability = function(var major: Integer; var minor: Integer; dev: TCUdevice): TCUresult;stdcall;
  522. TcuDeviceTotalMem = function(bytes: PSize_t; dev: TCUdevice): TCUresult;stdcall;
  523. TcuDeviceGetProperties = function(var prop: TCUdevprop; dev: TCUdevice): TCUresult;stdcall;
  524. TcuDeviceGetAttribute = function(pi: PSize_t; attrib: TCUdevice_attribute; dev: TCUdevice): TCUresult;stdcall;
  525. TcuCtxCreate = function(var pctx: PCUcontext; Flags: Cardinal; dev: TCUdevice): TCUresult;stdcall;
  526. TcuCtxDestroy = function(ctx: PCUcontext): TCUresult;stdcall;
  527. TcuCtxAttach = function(var pctx: PCUcontext; Flags: Cardinal): TCUresult;stdcall;
  528. TcuCtxDetach = function(ctx: PCUcontext): TCUresult;stdcall;
  529. TcuCtxPushCurrent = function(ctx: PCUcontext): TCUresult;stdcall;
  530. TcuCtxPopCurrent = function(var pctx: PCUcontext): TCUresult;stdcall;
  531. TcuCtxGetDevice = function(var device: TCUdevice): TCUresult;stdcall;
  532. TcuCtxSynchronize = function: TCUresult;stdcall;
  533. TcuModuleLoad = function(var module: PCUmodule; const fname: PAnsiChar): TCUresult;stdcall;
  534. TcuModuleLoadData = function(var module: PCUmodule; const image: PAnsiChar): TCUresult;stdcall;
  535. TcuModuleLoadDataEx = function(var module: PCUmodule; var image;
  536. numOptions: Cardinal; var options: TCUjit_option; var optionValues): TCUresult;stdcall;
  537. TcuModuleLoadFatBinary = function(var module: PCUmodule; var fatCubin): TCUresult;stdcall;
  538. TcuModuleUnload = function(hmod: PCUmodule): TCUresult;stdcall;
  539. TcuModuleGetFunction = function(out hfunc: PCUfunction; hmod: PCUmodule;
  540. const name: PAnsiChar): TCUresult;stdcall;
  541. TcuModuleGetGlobal = function(out dptr: TCUdeviceptr; var bytes: Cardinal;
  542. hmod: PCUmodule; const name: PAnsiChar): TCUresult;stdcall;
  543. TcuModuleGetTexRef = function(out pTexRef: PCUtexref; hmod: PCUmodule;
  544. const name: PAnsiChar): TCUresult;stdcall;
  545. TcuMemGetInfo = function(var free: Cardinal; var total: Cardinal): TCUresult;stdcall;
  546. TcuMemAlloc = function(var dptr: TCUdeviceptr; bytesize: Cardinal): TCUresult;stdcall;
  547. TcuMemAllocPitch = function(var dptr: TCUdeviceptr; var pPitch: Cardinal;
  548. WidthInBytes: Cardinal; Height: Cardinal; ElementSizeBytes: Cardinal): TCUresult;stdcall;
  549. TcuMemFree = function(dptr: TCUdeviceptr): TCUresult;stdcall;
  550. TcuMemGetAddressRange = function(var pbase: TCUdeviceptr; var psize: Cardinal;
  551. dptr: TCUdeviceptr): TCUresult;stdcall;
  552. TcuMemAllocHost = function(var pp; bytesize: Cardinal): TCUresult;stdcall;
  553. TcuMemFreeHost = function(p: Pointer): TCUresult;stdcall;
  554. TcuMemHostAlloc = function(var pp: Pointer; bytesize: Cardinal; Flags: Cardinal): TCUresult;stdcall;
  555. TcuMemHostGetDevicePointer = function(var pdptr: TCUdeviceptr; p: Pointer; Flags: Cardinal): TCUresult;stdcall;
  556. TcuMemHostGetFlags = function(var pFlags: Cardinal; var p): TCUresult;stdcall;
  557. TcuMemcpyHtoD = function(dstDevice: TCUdeviceptr; const srcHost: Pointer;
  558. ByteCount: Cardinal): TCUresult;stdcall;
  559. TcuMemcpyDtoH = function(const dstHost: Pointer; srcDevice: TCUdeviceptr;
  560. ByteCount: Cardinal): TCUresult;stdcall;
  561. TcuMemcpyDtoD = function(dstDevice: TCUdeviceptr; srcDevice: TCUdeviceptr;
  562. ByteCount: Cardinal): TCUresult;stdcall;
  563. TcuMemcpyDtoDAsync = function(dstDevice: TCUdeviceptr;
  564. srcDevice: TCUdeviceptr; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  565. TcuMemcpyDtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
  566. srcDevice: TCUdeviceptr; ByteCount: Cardinal): TCUresult;stdcall;
  567. TcuMemcpyAtoD = function(dstDevice: TCUdeviceptr; hSrc: PCUarray;
  568. SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
  569. TcuMemcpyHtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
  570. pSrc: Pointer; ByteCount: Cardinal): TCUresult;stdcall;
  571. TcuMemcpyAtoH = function(dstHost: Pointer; srcArray: PCUarray;
  572. SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
  573. TcuMemcpyAtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
  574. srcArray: PCUarray; SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
  575. TcuMemcpy2D = function(const pCopy: PCUDA_MEMCPY2D): TCUresult;stdcall;
  576. TcuMemcpy2DUnaligned = function(var pCopy: TCUDA_MEMCPY2D): TCUresult;stdcall;
  577. TcuMemcpy3D = function(var pCopy: TCUDA_MEMCPY3D): TCUresult;stdcall;
  578. TcuMemcpyHtoDAsync = function(dstDevice: TCUdeviceptr; var srcHost;
  579. ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  580. TcuMemcpyDtoHAsync = function(var dstHost; srcDevice: TCUdeviceptr;
  581. ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  582. TcuMemcpyHtoAAsync = function(dstArray: PCUarray; dstIndex: Cardinal;
  583. var pSrc; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  584. TcuMemcpyAtoHAsync = function(var dstHost; srcArray: PCUstream;
  585. SrcIndex: Cardinal; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  586. TcuMemcpy2DAsync = function(var pCopy: TCUDA_MEMCPY2D; hStream: PCUstream): TCUresult;stdcall;
  587. TcuMemcpy3DAsync = function(var pCopy: TCUDA_MEMCPY3D; hStream: PCUstream): TCUresult;stdcall;
  588. TcuMemsetD8 = function(dstDevice: TCUdeviceptr; ub: Byte; N: Cardinal): TCUresult;stdcall;
  589. TcuMemsetD16 = function(dstDevice: TCUdeviceptr; uw: Word; N: Cardinal): TCUresult;stdcall;
  590. TcuMemsetD32 = function(dstDevice: TCUdeviceptr; ui: Cardinal; N: Cardinal): TCUresult;stdcall;
  591. TcuMemsetD2D8 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
  592. ub: Byte; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
  593. TcuMemsetD2D16 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
  594. uw: Word; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
  595. TcuMemsetD2D32 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
  596. ui: Cardinal; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
  597. TcuFuncSetBlockShape = function(hfunc: PCUfunction; x: Integer; y: Integer;
  598. z: Integer): TCUresult;stdcall;
  599. TcuFuncSetSharedSize = function(hfunc: PCUfunction; bytes: Cardinal): TCUresult;stdcall;
  600. TcuFuncGetAttribute = function(var pi: Integer; attrib: TCUfunction_attribute;
  601. hfunc: PCUfunction): TCUresult;stdcall;
  602. TcuArrayCreate = function(var pHandle: PCUarray;
  603. var pAllocateArray: TCUDA_ARRAY_DESCRIPTOR): TCUresult;stdcall;
  604. TcuArrayGetDescriptor = function(var pArrayDescriptor: TCUDA_ARRAY_DESCRIPTOR;
  605. hArray: PCUarray): TCUresult;stdcall;
  606. TcuArrayDestroy = function(hArray: PCUarray): TCUresult;stdcall;
  607. TcuArray3DCreate = function(var pHandle: PCUarray;
  608. var pAllocateArray: TCUDA_ARRAY3D_DESCRIPTOR): TCUresult;stdcall;
  609. TcuArray3DGetDescriptor = function(var pArrayDescriptor
  610. : TCUDA_ARRAY3D_DESCRIPTOR; hArray: PCUarray): TCUresult;stdcall;
  611. TcuTexRefCreate = function(var pTexRef: PCUtexref): TCUresult;stdcall;
  612. TcuTexRefDestroy = function(hTexRef: PCUtexref): TCUresult;stdcall;
  613. TcuTexRefSetArray = function(hTexRef: PCUtexref; hArray: PCUarray;
  614. Flags: Cardinal): TCUresult;stdcall;
  615. TcuTexRefSetAddress = function(var ByteOffset: Cardinal; hTexRef: PCUtexref;
  616. dptr: TCUdeviceptr; bytes: Cardinal): TCUresult;stdcall;
  617. TcuTexRefSetAddress2D = function(hTexRef: PCUtexref;
  618. var desc: TCUDA_ARRAY_DESCRIPTOR; dptr: TCUdeviceptr; Pitch: Cardinal)
  619. : TCUresult;stdcall;
  620. TcuTexRefSetFormat = function(hTexRef: PCUtexref; fmt: TCUarray_format;
  621. NumPackedComponents: Integer): TCUresult;stdcall;
  622. TcuTexRefSetAddressMode = function(hTexRef: PCUtexref; dim: Integer;
  623. am: TCUaddress_mode): TCUresult;stdcall;
  624. TcuTexRefSetFilterMode = function(hTexRef: PCUtexref; fm: TCUfilter_mode)
  625. : TCUresult;stdcall;
  626. TcuTexRefSetFlags = function(hTexRef: PCUtexref; Flags: Cardinal): TCUresult;stdcall;
  627. TcuTexRefGetAddress = function(var pdptr: TCUdeviceptr; hTexRef: PCUtexref): TCUresult;stdcall;
  628. TcuTexRefGetArray = function(var phArray: PCUarray; hTexRef: PCUtexref): TCUresult;stdcall;
  629. TcuTexRefGetAddressMode = function(var pam: TCUaddress_mode;
  630. hTexRef: PCUtexref; dim: Integer): TCUresult;stdcall;
  631. TcuTexRefGetFilterMode = function(var pfm: TCUfilter_mode; hTexRef: PCUtexref): TCUresult;stdcall;
  632. TcuTexRefGetFormat = function(var pFormat: TCUarray_format;
  633. var pNumChannels: Integer; hTexRef: PCUtexref): TCUresult;stdcall;
  634. TcuTexRefGetFlags = function(var pFlags: Cardinal; hTexRef: PCUtexref): TCUresult;stdcall;
  635. TcuParamSetSize = function(hfunc: PCUfunction; numbytes: Cardinal): TCUresult;stdcall;
  636. TcuParamSeti = function(hfunc: PCUfunction; offset: Integer; value: Cardinal)
  637. : TCUresult;stdcall;
  638. TcuParamSetf = function(hfunc: PCUfunction; offset: Integer; value: Single)
  639. : TCUresult;stdcall;
  640. TcuParamSetv = function(hfunc: PCUfunction; offset: Integer; var ptr;
  641. numbytes: Cardinal): TCUresult;stdcall;
  642. TcuParamSetTexRef = function(hfunc: PCUfunction; texunit: Integer;
  643. hTexRef: PCUtexref): TCUresult;stdcall;
  644. TcuLaunch = function(f: PCUfunction): TCUresult;stdcall;
  645. TcuLaunchGrid = function(f: PCUfunction; grid_width: Integer;
  646. grid_height: Integer): TCUresult;stdcall;
  647. TcuLaunchGridAsync = function(f: PCUfunction; grid_width: Integer;
  648. grid_height: Integer; hStream: PCUstream): TCUresult;stdcall;
  649. TcuEventCreate = function(var phEvent: PCUevent; Flags: Cardinal): TCUresult;stdcall;
  650. TcuEventRecord = function(hEvent: PCUevent; hStream: PCUstream): TCUresult;stdcall;
  651. TcuEventQuery = function(hEvent: PCUevent): TCUresult;stdcall;
  652. TcuEventSynchronize = function(hEvent: PCUevent): TCUresult;stdcall;
  653. TcuEventDestroy = function(hEvent: PCUevent): TCUresult;stdcall;
  654. TcuEventElapsedTime = function(var pMilliseconds: Single; hStart: PCUevent;
  655. hEnd: PCUevent): TCUresult;stdcall;
  656. TcuStreamCreate = function(var phStream: PCUstream; Flags: Cardinal): TCUresult;stdcall;
  657. TcuStreamQuery = function(hStream: PCUstream): TCUresult;stdcall;
  658. TcuStreamSynchronize = function(hStream: PCUstream): TCUresult;stdcall;
  659. TcuStreamDestroy = function(hStream: PCUstream): TCUresult;stdcall;
  660. TcuGLCtxCreate = function(var pctx: PCUcontext; Flags: Cardinal;
  661. device: TCUdevice): TCUresult;stdcall;
  662. TcuGraphicsGLRegisterBuffer = function(var pCudaResource: PCUgraphicsResource;
  663. buffer: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;stdcall;
  664. TcuGraphicsGLRegisterImage = function(var pCudaResource: PCUgraphicsResource;
  665. image, target: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;stdcall;
  666. TcuWGLGetDevice = function(var pDevice: TCUdevice; hGpu: HGPUNV): TCUresult;stdcall;
  667. TcuGraphicsUnregisterResource = function(resource: PCUgraphicsResource): TCUresult;stdcall;
  668. TcuGraphicsSubResourceGetMappedArray = function(var pArray: PCUarray;
  669. resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal)
  670. : TCUresult;stdcall;
  671. TcuGraphicsResourceGetMappedPointer = function(var pDevPtr: TCUdeviceptr;
  672. out psize: Cardinal; resource: PCUgraphicsResource): TCUresult;stdcall;
  673. TcuGraphicsResourceSetMapFlags = function(resource: PCUgraphicsResource;
  674. Flags: Cardinal): TCUresult;stdcall;
  675. TcuGraphicsMapResources = function(count: Cardinal;
  676. resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
  677. TcuGraphicsUnmapResources = function(count: Cardinal;
  678. resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
  679. TcuGLInit = procedure();stdcall;
  680. TcuGLRegisterBufferObject = function(buffer: Cardinal): TCUresult;stdcall;
  681. TcuGLMapBufferObject = function(var dptr: TCUdeviceptr; var size: Cardinal;
  682. buffer: Cardinal): TCUresult;stdcall;
  683. TcuGLUnmapBufferObject = function(buffer: Cardinal): TCUresult;stdcall;
  684. TcuGLUnregisterBufferObject = function(buffer: Cardinal): TCUresult;stdcall;
  685. TcuGLSetBufferObjectMapFlags = function(buffer: Cardinal; Flags: Cardinal)
  686. : TCUresult;stdcall;
  687. TcuGLMapBufferObjectAsync = function(var dptr: TCUdeviceptr;
  688. var size: Cardinal; buffer: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  689. TcuGLUnmapBufferObjectAsync = function(buffer: Cardinal; hStream: PCUstream)
  690. : TCUresult;stdcall;
  691. var
  692. cuInit: TcuInit;
  693. cuDriverGetVersion: TcuDriverGetVersion;
  694. cuDeviceGet: TcuDeviceGet;
  695. cuDeviceGetCount: TcuDeviceGetCount;
  696. cuDeviceGetName: TcuDeviceGetName;
  697. cuDeviceComputeCapability: TcuDeviceComputeCapability;
  698. cuDeviceTotalMem: TcuDeviceTotalMem;
  699. cuDeviceGetProperties: TcuDeviceGetProperties;
  700. cuDeviceGetAttribute: TcuDeviceGetAttribute;
  701. cuCtxCreate: TcuCtxCreate;
  702. cuCtxDestroy: TcuCtxDestroy;
  703. cuCtxAttach: TcuCtxAttach;
  704. cuCtxDetach: TcuCtxDetach;
  705. cuCtxPushCurrent: TcuCtxPushCurrent;
  706. cuCtxPopCurrent: TcuCtxPopCurrent;
  707. cuCtxGetDevice: TcuCtxGetDevice;
  708. cuCtxSynchronize: TcuCtxSynchronize;
  709. cuModuleLoad: TcuModuleLoad;
  710. cuModuleLoadData: TcuModuleLoadData;
  711. cuModuleLoadDataEx: TcuModuleLoadDataEx;
  712. cuModuleLoadFatBinary: TcuModuleLoadFatBinary;
  713. cuModuleUnload: TcuModuleUnload;
  714. cuModuleGetFunction: TcuModuleGetFunction;
  715. cuModuleGetGlobal: TcuModuleGetGlobal;
  716. cuModuleGetTexRef: TcuModuleGetTexRef;
  717. cuMemGetInfo: TcuMemGetInfo;
  718. cuMemAlloc: TcuMemAlloc;
  719. cuMemAllocPitch: TcuMemAllocPitch;
  720. cuMemFree: TcuMemFree;
  721. cuMemGetAddressRange: TcuMemGetAddressRange;
  722. cuMemAllocHost: TcuMemAllocHost;
  723. cuMemFreeHost: TcuMemFreeHost;
  724. cuMemHostAlloc: TcuMemHostAlloc;
  725. cuMemHostGetDevicePointer: TcuMemHostGetDevicePointer;
  726. cuMemHostGetFlags: TcuMemHostGetFlags;
  727. cuMemcpyHtoD: TcuMemcpyHtoD;
  728. cuMemcpyDtoH: TcuMemcpyDtoH;
  729. cuMemcpyDtoD: TcuMemcpyDtoD;
  730. cuMemcpyDtoDAsync: TcuMemcpyDtoDAsync;
  731. cuMemcpyDtoA: TcuMemcpyDtoA;
  732. cuMemcpyAtoD: TcuMemcpyAtoD;
  733. cuMemcpyHtoA: TcuMemcpyHtoA;
  734. cuMemcpyAtoH: TcuMemcpyAtoH;
  735. cuMemcpyAtoA: TcuMemcpyAtoA;
  736. cuMemcpy2D: TcuMemcpy2D;
  737. cuMemcpy2DUnaligned: TcuMemcpy2DUnaligned;
  738. cuMemcpy3D: TcuMemcpy3D;
  739. cuMemcpyHtoDAsync: TcuMemcpyHtoDAsync;
  740. cuMemcpyDtoHAsync: TcuMemcpyDtoHAsync;
  741. cuMemcpyHtoAAsync: TcuMemcpyHtoAAsync;
  742. cuMemcpyAtoHAsync: TcuMemcpyAtoHAsync;
  743. cuMemcpy2DAsync: TcuMemcpy2DAsync;
  744. cuMemcpy3DAsync: TcuMemcpy3DAsync;
  745. cuMemsetD8: TcuMemsetD8;
  746. cuMemsetD16: TcuMemsetD16;
  747. cuMemsetD32: TcuMemsetD32;
  748. cuMemsetD2D8: TcuMemsetD2D8;
  749. cuMemsetD2D16: TcuMemsetD2D16;
  750. cuMemsetD2D32: TcuMemsetD2D32;
  751. cuFuncSetBlockShape: TcuFuncSetBlockShape;
  752. cuFuncSetSharedSize: TcuFuncSetSharedSize;
  753. cuFuncGetAttribute: TcuFuncGetAttribute;
  754. cuArrayCreate: TcuArrayCreate;
  755. cuArrayGetDescriptor: TcuArrayGetDescriptor;
  756. cuArrayDestroy: TcuArrayDestroy;
  757. cuArray3DCreate: TcuArray3DCreate;
  758. cuArray3DGetDescriptor: TcuArray3DGetDescriptor;
  759. cuTexRefCreate: TcuTexRefCreate;
  760. cuTexRefDestroy: TcuTexRefDestroy;
  761. cuTexRefSetArray: TcuTexRefSetArray;
  762. cuTexRefSetAddress: TcuTexRefSetAddress;
  763. cuTexRefSetAddress2D: TcuTexRefSetAddress2D;
  764. cuTexRefSetFormat: TcuTexRefSetFormat;
  765. cuTexRefSetAddressMode: TcuTexRefSetAddressMode;
  766. cuTexRefSetFilterMode: TcuTexRefSetFilterMode;
  767. cuTexRefSetFlags: TcuTexRefSetFlags;
  768. cuTexRefGetAddress: TcuTexRefGetAddress;
  769. cuTexRefGetArray: TcuTexRefGetArray;
  770. cuTexRefGetAddressMode: TcuTexRefGetAddressMode;
  771. cuTexRefGetFilterMode: TcuTexRefGetFilterMode;
  772. cuTexRefGetFormat: TcuTexRefGetFormat;
  773. cuTexRefGetFlags: TcuTexRefGetFlags;
  774. cuParamSetSize: TcuParamSetSize;
  775. cuParamSeti: TcuParamSeti;
  776. cuParamSetf: TcuParamSetf;
  777. cuParamSetv: TcuParamSetv;
  778. cuParamSetTexRef: TcuParamSetTexRef;
  779. cuLaunch: TcuLaunch;
  780. cuLaunchGrid: TcuLaunchGrid;
  781. cuLaunchGridAsync: TcuLaunchGridAsync;
  782. cuEventCreate: TcuEventCreate;
  783. cuEventRecord: TcuEventRecord;
  784. cuEventQuery: TcuEventQuery;
  785. cuEventSynchronize: TcuEventSynchronize;
  786. cuEventDestroy: TcuEventDestroy;
  787. cuEventElapsedTime: TcuEventElapsedTime;
  788. cuStreamCreate: TcuStreamCreate;
  789. cuStreamQuery: TcuStreamQuery;
  790. cuStreamSynchronize: TcuStreamSynchronize;
  791. cuStreamDestroy: TcuStreamDestroy;
  792. cuGLInit: TcuGLInit;
  793. cuGLCtxCreate: TcuGLCtxCreate;
  794. cuGraphicsGLRegisterBuffer: TcuGraphicsGLRegisterBuffer;
  795. cuGraphicsGLRegisterImage: TcuGraphicsGLRegisterImage;
  796. cuWGLGetDevice: TcuWGLGetDevice;
  797. cuGraphicsUnregisterResource: TcuGraphicsUnregisterResource;
  798. cuGraphicsSubResourceGetMappedArray: TcuGraphicsSubResourceGetMappedArray;
  799. cuGraphicsResourceGetMappedPointer: TcuGraphicsResourceGetMappedPointer;
  800. cuGraphicsResourceSetMapFlags: TcuGraphicsResourceSetMapFlags;
  801. cuGraphicsMapResources: TcuGraphicsMapResources;
  802. cuGraphicsUnmapResources: TcuGraphicsUnmapResources;
  803. cuGLRegisterBufferObject: TcuGLRegisterBufferObject;
  804. cuGLMapBufferObject: TcuGLMapBufferObject;
  805. cuGLUnmapBufferObject: TcuGLUnmapBufferObject;
  806. cuGLUnregisterBufferObject: TcuGLUnregisterBufferObject;
  807. cuGLSetBufferObjectMapFlags: TcuGLSetBufferObjectMapFlags;
  808. cuGLMapBufferObjectAsync: TcuGLMapBufferObjectAsync;
  809. cuGLUnmapBufferObjectAsync: TcuGLUnmapBufferObjectAsync;
  810. function InitCUDA: Boolean;
  811. procedure CloseCUDA;
  812. function InitCUDAFromLibrary(const LibName: WideString): Boolean;
  813. function IsCUDAInitialized: Boolean;
  814. function Get_CUDA_API_Error_String(AError: TCUresult): string;
  815. implementation //==============================================================
  816. resourcestring
  817. cudasFuncRetErr = '%s return error: %s';
  818. const
  819. INVALID_MODULEHANDLE = 0;
  820. // ************** Windows specific ********************
  821. {$IFDEF MSWINDOWS}
  822. var
  823. CUDAHandle: HINST;
  824. {$ENDIF}
  825. // ************** UNIX specific ********************
  826. {$IFDEF UNIX}
  827. var
  828. CUDAHandle: TLibHandle;
  829. {$ENDIF}
  830. const
  831. cuInitName = 'cuInit';
  832. cuDriverGetVersionName = 'cuDriverGetVersion';
  833. cuDeviceGet_Name = 'cuDeviceGet';
  834. cuDeviceGetCountName = 'cuDeviceGetCount';
  835. cuDeviceGetNameName = 'cuDeviceGetName';
  836. cuDeviceComputeCapabilityName = 'cuDeviceComputeCapability';
  837. cuDeviceTotalMemName = 'cuDeviceTotalMem';
  838. cuDeviceGetPropertiesName = 'cuDeviceGetProperties';
  839. cuDeviceGetAttributeName = 'cuDeviceGetAttribute';
  840. cuCtxCreateName = 'cuCtxCreate';
  841. cuCtxDestroyName = 'cuCtxDestroy';
  842. cuCtxAttachName = 'cuCtxAttach';
  843. cuCtxDetachName = 'cuCtxDetach';
  844. cuCtxPushCurrentName = 'cuCtxPushCurrent';
  845. cuCtxPopCurrentName = 'cuCtxPopCurrent';
  846. cuCtxGetDeviceName = 'cuCtxGetDevice';
  847. cuCtxSynchronizeName = 'cuCtxSynchronize';
  848. cuModuleLoadName = 'cuModuleLoad';
  849. cuModuleLoadDataName = 'cuModuleLoadData';
  850. cuModuleLoadDataExName = 'cuModuleLoadDataEx';
  851. cuModuleLoadFatBinaryName = 'cuModuleLoadFatBinary';
  852. cuModuleUnloadName = 'cuModuleUnload';
  853. cuModuleGetFunctionName = 'cuModuleGetFunction';
  854. cuModuleGetGlobalName = 'cuModuleGetGlobal';
  855. cuModuleGetTexRefName = 'cuModuleGetTexRef';
  856. cuMemGetInfoName = 'cuMemGetInfo';
  857. cuMemAllocName = 'cuMemAlloc';
  858. cuMemAllocPitchName = 'cuMemAllocPitch';
  859. cuMemFreeName = 'cuMemFree';
  860. cuMemGetAddressRangeName = 'cuMemGetAddressRange';
  861. cuMemAllocHostName = 'cuMemAllocHost';
  862. cuMemFreeHostName = 'cuMemFreeHost';
  863. cuMemHostAllocName = 'cuMemHostAlloc';
  864. cuMemHostGetDevicePointerName = 'cuMemHostGetDevicePointer';
  865. cuMemHostGetFlagsName = 'cuMemHostGetFlags';
  866. cuMemcpyHtoDName = 'cuMemcpyHtoD';
  867. cuMemcpyDtoHName = 'cuMemcpyDtoH';
  868. cuMemcpyDtoDName = 'cuMemcpyDtoD';
  869. cuMemcpyDtoDAsyncName = 'cuMemcpyDtoDAsync';
  870. cuMemcpyDtoAName = 'cuMemcpyDtoA';
  871. cuMemcpyAtoDName = 'cuMemcpyAtoD';
  872. cuMemcpyHtoAName = 'cuMemcpyHtoA';
  873. cuMemcpyAtoHName = 'cuMemcpyAtoH';
  874. cuMemcpyAtoAName = 'cuMemcpyAtoA';
  875. cuMemcpy2DName = 'cuMemcpy2D';
  876. cuMemcpy2DUnalignedName = 'cuMemcpy2DUnaligned';
  877. cuMemcpy3DName = 'cuMemcpy3D';
  878. cuMemcpyHtoDAsyncName = 'cuMemcpyHtoDAsync';
  879. cuMemcpyDtoHAsyncName = 'cuMemcpyDtoHAsync';
  880. cuMemcpyHtoAAsyncName = 'cuMemcpyHtoAAsync';
  881. cuMemcpyAtoHAsyncName = 'cuMemcpyAtoHAsync';
  882. cuMemcpy2DAsyncName = 'cuMemcpy2DAsync';
  883. cuMemcpy3DAsyncName = 'cuMemcpy3DAsync';
  884. cuMemsetD8Name = 'cuMemsetD8';
  885. cuMemsetD16Name = 'cuMemsetD16';
  886. cuMemsetD32Name = 'cuMemsetD32';
  887. cuMemsetD2D8Name = 'cuMemsetD2D8';
  888. cuMemsetD2D16Name = 'cuMemsetD2D16';
  889. cuMemsetD2D32Name = 'cuMemsetD2D32';
  890. cuFuncSetBlockShapeName = 'cuFuncSetBlockShape';
  891. cuFuncSetSharedSizeName = 'cuFuncSetSharedSize';
  892. cuFuncGetAttributeName = 'cuFuncGetAttribute';
  893. cuArrayCreateName = 'cuArrayCreate';
  894. cuArrayGetDescriptorName = 'cuArrayGetDescriptor';
  895. cuArrayDestroyName = 'cuArrayDestroy';
  896. cuArray3DCreateName = 'cuArray3DCreate';
  897. cuArray3DGetDescriptorName = 'cuArray3DGetDescriptor';
  898. cuTexRefCreateName = 'cuTexRefCreate';
  899. cuTexRefDestroyName = 'cuTexRefDestroy';
  900. cuTexRefSetArrayName = 'cuTexRefSetArray';
  901. cuTexRefSetAddressName = 'cuTexRefSetAddress';
  902. cuTexRefSetAddress2DName = 'cuTexRefSetAddress2D';
  903. cuTexRefSetFormatName = 'cuTexRefSetFormat';
  904. cuTexRefSetAddressModeName = 'cuTexRefSetAddressMode';
  905. cuTexRefSetFilterModeName = 'cuTexRefSetFilterMode';
  906. cuTexRefSetFlagsName = 'cuTexRefSetFlags';
  907. cuTexRefGetAddressName = 'cuTexRefGetAddress';
  908. cuTexRefGetArrayName = 'cuTexRefGetArray';
  909. cuTexRefGetAddressModeName = 'cuTexRefGetAddressMode';
  910. cuTexRefGetFilterModeName = 'cuTexRefGetFilterMode';
  911. cuTexRefGetFormatName = 'cuTexRefGetFormat';
  912. cuTexRefGetFlagsName = 'cuTexRefGetFlags';
  913. cuParamSetSizeName = 'cuParamSetSize';
  914. cuParamSetiName = 'cuParamSeti';
  915. cuParamSetfName = 'cuParamSetf';
  916. cuParamSetvName = 'cuParamSetv';
  917. cuParamSetTexRefName = 'cuParamSetTexRef';
  918. cuLaunchName = 'cuLaunch';
  919. cuLaunchGridName = 'cuLaunchGrid';
  920. cuLaunchGridAsyncName = 'cuLaunchGridAsync';
  921. cuEventCreateName = 'cuEventCreate';
  922. cuEventRecordName = 'cuEventRecord';
  923. cuEventQueryName = 'cuEventQuery';
  924. cuEventSynchronizeName = 'cuEventSynchronize';
  925. cuEventDestroyName = 'cuEventDestroy';
  926. cuEventElapsedTimeName = 'cuEventElapsedTime';
  927. cuStreamCreateName = 'cuStreamCreate';
  928. cuStreamQueryName = 'cuStreamQuery';
  929. cuStreamSynchronizeName = 'cuStreamSynchronize';
  930. cuStreamDestroyName = 'cuStreamDestroy';
  931. cuGLCtxCreateName = 'cuGLCtxCreate';
  932. cuGraphicsGLRegisterBufferName = 'cuGraphicsGLRegisterBuffer';
  933. cuGraphicsGLRegisterImageName = 'cuGraphicsGLRegisterImage';
  934. cuWGLGetDeviceName = 'cuWGLGetDevice';
  935. cuGraphicsUnregisterResourceName = 'cuGraphicsUnregisterResource';
  936. cuGraphicsSubResourceGetMappedArrayName =
  937. 'cuGraphicsSubResourceGetMappedArray';
  938. cuGraphicsResourceGetMappedPointerName = 'cuGraphicsResourceGetMappedPointer';
  939. cuGraphicsResourceSetMapFlagsName = 'cuGraphicsResourceSetMapFlags';
  940. cuGraphicsMapResourcesName = 'cuGraphicsMapResources';
  941. cuGraphicsUnmapResourcesName = 'cuGraphicsUnmapResources';
  942. cuGLInitName = 'cuGLInit';
  943. cuGLRegisterBufferObjectName = 'cuGLRegisterBufferObject';
  944. cuGLMapBufferObjectName = 'cuGLMapBufferObject';
  945. cuGLUnmapBufferObjectName = 'cuGLUnmapBufferObject';
  946. cuGLUnregisterBufferObjectName = 'cuGLUnregisterBufferObject';
  947. cuGLSetBufferObjectMapFlagsName = 'cuGLSetBufferObjectMapFlags';
  948. cuGLMapBufferObjectAsyncName = 'cuGLMapBufferObjectAsync';
  949. cuGLUnmapBufferObjectAsyncName = 'cuGLUnmapBufferObjectAsync';
  950. {$IFDEF USE_CUDA_DEBUG_MODE}
  951. var
  952. cuInit_: TcuInit;
  953. cuDriverGetVersion_: TcuDriverGetVersion;
  954. cuDeviceGet_: TcuDeviceGet;
  955. cuDeviceGetCount_: TcuDeviceGetCount;
  956. cuDeviceGetName_: TcuDeviceGetName;
  957. cuDeviceComputeCapability_: TcuDeviceComputeCapability;
  958. cuDeviceTotalMem_: TcuDeviceTotalMem;
  959. cuDeviceGetProperties_: TcuDeviceGetProperties;
  960. cuDeviceGetAttribute_: TcuDeviceGetAttribute;
  961. cuCtxCreate_: TcuCtxCreate;
  962. cuCtxDestroy_: TcuCtxDestroy;
  963. cuCtxAttach_: TcuCtxAttach;
  964. cuCtxDetach_: TcuCtxDetach;
  965. cuCtxPushCurrent_: TcuCtxPushCurrent;
  966. cuCtxPopCurrent_: TcuCtxPopCurrent;
  967. cuCtxGetDevice_: TcuCtxGetDevice;
  968. cuCtxSynchronize_: TcuCtxSynchronize;
  969. cuModuleLoad_: TcuModuleLoad;
  970. cuModuleLoadData_: TcuModuleLoadData;
  971. cuModuleLoadDataEx_: TcuModuleLoadDataEx;
  972. cuModuleLoadFatBinary_: TcuModuleLoadFatBinary;
  973. cuModuleUnload_: TcuModuleUnload;
  974. cuModuleGetFunction_: TcuModuleGetFunction;
  975. cuModuleGetGlobal_: TcuModuleGetGlobal;
  976. cuModuleGetTexRef_: TcuModuleGetTexRef;
  977. cuMemGetInfo_: TcuMemGetInfo;
  978. cuMemAlloc_: TcuMemAlloc;
  979. cuMemAllocPitch_: TcuMemAllocPitch;
  980. cuMemFree_: TcuMemFree;
  981. cuMemGetAddressRange_: TcuMemGetAddressRange;
  982. cuMemAllocHost_: TcuMemAllocHost;
  983. cuMemFreeHost_: TcuMemFreeHost;
  984. cuMemHostAlloc_: TcuMemHostAlloc;
  985. cuMemHostGetDevicePointer_: TcuMemHostGetDevicePointer;
  986. cuMemHostGetFlags_: TcuMemHostGetFlags;
  987. cuMemcpyHtoD_: TcuMemcpyHtoD;
  988. cuMemcpyDtoH_: TcuMemcpyDtoH;
  989. cuMemcpyDtoD_: TcuMemcpyDtoD;
  990. cuMemcpyDtoDAsync_: TcuMemcpyDtoDAsync;
  991. cuMemcpyDtoA_: TcuMemcpyDtoA;
  992. cuMemcpyAtoD_: TcuMemcpyAtoD;
  993. cuMemcpyHtoA_: TcuMemcpyHtoA;
  994. cuMemcpyAtoH_: TcuMemcpyAtoH;
  995. cuMemcpyAtoA_: TcuMemcpyAtoA;
  996. cuMemcpy2D_: TcuMemcpy2D;
  997. cuMemcpy2DUnaligned_: TcuMemcpy2DUnaligned;
  998. cuMemcpy3D_: TcuMemcpy3D;
  999. cuMemcpyHtoDAsync_: TcuMemcpyHtoDAsync;
  1000. cuMemcpyDtoHAsync_: TcuMemcpyDtoHAsync;
  1001. cuMemcpyHtoAAsync_: TcuMemcpyHtoAAsync;
  1002. cuMemcpyAtoHAsync_: TcuMemcpyAtoHAsync;
  1003. cuMemcpy2DAsync_: TcuMemcpy2DAsync;
  1004. cuMemcpy3DAsync_: TcuMemcpy3DAsync;
  1005. cuMemsetD8_: TcuMemsetD8;
  1006. cuMemsetD16_: TcuMemsetD16;
  1007. cuMemsetD32_: TcuMemsetD32;
  1008. cuMemsetD2D8_: TcuMemsetD2D8;
  1009. cuMemsetD2D16_: TcuMemsetD2D16;
  1010. cuMemsetD2D32_: TcuMemsetD2D32;
  1011. cuFuncSetBlockShape_: TcuFuncSetBlockShape;
  1012. cuFuncSetSharedSize_: TcuFuncSetSharedSize;
  1013. cuFuncGetAttribute_: TcuFuncGetAttribute;
  1014. cuArrayCreate_: TcuArrayCreate;
  1015. cuArrayGetDescriptor_: TcuArrayGetDescriptor;
  1016. cuArrayDestroy_: TcuArrayDestroy;
  1017. cuArray3DCreate_: TcuArray3DCreate;
  1018. cuArray3DGetDescriptor_: TcuArray3DGetDescriptor;
  1019. cuTexRefCreate_: TcuTexRefCreate;
  1020. cuTexRefDestroy_: TcuTexRefDestroy;
  1021. cuTexRefSetArray_: TcuTexRefSetArray;
  1022. cuTexRefSetAddress_: TcuTexRefSetAddress;
  1023. cuTexRefSetAddress2D_: TcuTexRefSetAddress2D;
  1024. cuTexRefSetFormat_: TcuTexRefSetFormat;
  1025. cuTexRefSetAddressMode_: TcuTexRefSetAddressMode;
  1026. cuTexRefSetFilterMode_: TcuTexRefSetFilterMode;
  1027. cuTexRefSetFlags_: TcuTexRefSetFlags;
  1028. cuTexRefGetAddress_: TcuTexRefGetAddress;
  1029. cuTexRefGetArray_: TcuTexRefGetArray;
  1030. cuTexRefGetAddressMode_: TcuTexRefGetAddressMode;
  1031. cuTexRefGetFilterMode_: TcuTexRefGetFilterMode;
  1032. cuTexRefGetFormat_: TcuTexRefGetFormat;
  1033. cuTexRefGetFlags_: TcuTexRefGetFlags;
  1034. cuParamSetSize_: TcuParamSetSize;
  1035. cuParamSeti_: TcuParamSeti;
  1036. cuParamSetf_: TcuParamSetf;
  1037. cuParamSetv_: TcuParamSetv;
  1038. cuParamSetTexRef_: TcuParamSetTexRef;
  1039. cuLaunch_: TcuLaunch;
  1040. cuLaunchGrid_: TcuLaunchGrid;
  1041. cuLaunchGridAsync_: TcuLaunchGridAsync;
  1042. cuEventCreate_: TcuEventCreate;
  1043. cuEventRecord_: TcuEventRecord;
  1044. cuEventQuery_: TcuEventQuery;
  1045. cuEventSynchronize_: TcuEventSynchronize;
  1046. cuEventDestroy_: TcuEventDestroy;
  1047. cuEventElapsedTime_: TcuEventElapsedTime;
  1048. cuStreamCreate_: TcuStreamCreate;
  1049. cuStreamQuery_: TcuStreamQuery;
  1050. cuStreamSynchronize_: TcuStreamSynchronize;
  1051. cuStreamDestroy_: TcuStreamDestroy;
  1052. cuGLCtxCreate_: TcuGLCtxCreate;
  1053. cuGraphicsGLRegisterBuffer_: TcuGraphicsGLRegisterBuffer;
  1054. cuGraphicsGLRegisterImage_: TcuGraphicsGLRegisterImage;
  1055. cuWGLGetDevice_: TcuWGLGetDevice;
  1056. cuGraphicsUnregisterResource_: TcuGraphicsUnregisterResource;
  1057. cuGraphicsSubResourceGetMappedArray_: TcuGraphicsSubResourceGetMappedArray;
  1058. cuGraphicsResourceGetMappedPointer_: TcuGraphicsResourceGetMappedPointer;
  1059. cuGraphicsResourceSetMapFlags_: TcuGraphicsResourceSetMapFlags;
  1060. cuGraphicsMapResources_: TcuGraphicsMapResources;
  1061. cuGraphicsUnmapResources_: TcuGraphicsUnmapResources;
  1062. cuGLRegisterBufferObject_: TcuGLRegisterBufferObject;
  1063. cuGLMapBufferObject_: TcuGLMapBufferObject;
  1064. cuGLUnmapBufferObject_: TcuGLUnmapBufferObject;
  1065. cuGLUnregisterBufferObject_: TcuGLUnregisterBufferObject;
  1066. cuGLSetBufferObjectMapFlags_: TcuGLSetBufferObjectMapFlags;
  1067. cuGLMapBufferObjectAsync_: TcuGLMapBufferObjectAsync;
  1068. cuGLUnmapBufferObjectAsync_: TcuGLUnmapBufferObjectAsync;
  1069. function cuInitShell(Flags: Cardinal): TCUresult;stdcall;
  1070. begin
  1071. Result := cuInit_(Flags);
  1072. if Result <> CUDA_SUCCESS then
  1073. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1074. [cuInitName, Get_CUDA_API_Error_String(Result)])
  1075. end;
  1076. function cuDriverGetVersionShell(out driverVersion: Integer): TCUresult;stdcall;
  1077. begin
  1078. Result := cuDriverGetVersion_(driverVersion);
  1079. if Result <> CUDA_SUCCESS then
  1080. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDriverGetVersionName,
  1081. Get_CUDA_API_Error_String(Result)])
  1082. end;
  1083. function cuDeviceGetShell(var device: TCUdevice; ordinal: Integer): TCUresult;stdcall;
  1084. begin
  1085. Result := cuDeviceGet_(device, ordinal);
  1086. if Result <> CUDA_SUCCESS then
  1087. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1088. [cuDeviceGet_Name, Get_CUDA_API_Error_String(Result)])
  1089. end;
  1090. function cuDeviceGetCountShell(var count: Integer): TCUresult;stdcall;
  1091. begin
  1092. Result := cuDeviceGetCount_(count);
  1093. if Result <> CUDA_SUCCESS then
  1094. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetCountName,
  1095. Get_CUDA_API_Error_String(Result)])
  1096. end;
  1097. function cuDeviceGetNameShell(name: PAnsiChar; len: Integer; dev: TCUdevice)
  1098. : TCUresult;stdcall;
  1099. begin
  1100. Result := cuDeviceGetName_(name, len, dev);
  1101. if Result <> CUDA_SUCCESS then
  1102. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetNameName,
  1103. Get_CUDA_API_Error_String(Result)])
  1104. end;
  1105. function cuDeviceComputeCapabilityShell(var major: Integer; var minor: Integer;
  1106. dev: TCUdevice): TCUresult;stdcall;
  1107. begin
  1108. Result := cuDeviceComputeCapability_(major, minor, dev);
  1109. if Result <> CUDA_SUCCESS then
  1110. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceComputeCapabilityName,
  1111. Get_CUDA_API_Error_String(Result)])
  1112. end;
  1113. function cuDeviceTotalMemShell(bytes: PSize_t; dev: TCUdevice): TCUresult;stdcall;
  1114. begin
  1115. Result := cuDeviceTotalMem_(bytes, dev);
  1116. if Result <> CUDA_SUCCESS then
  1117. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceTotalMemName,
  1118. Get_CUDA_API_Error_String(Result)])
  1119. end;
  1120. function cuDeviceGetPropertiesShell(var prop: TCUdevprop; dev: TCUdevice)
  1121. : TCUresult;
  1122. stdcall;
  1123. begin
  1124. Result := cuDeviceGetProperties_(prop, dev);
  1125. if Result <> CUDA_SUCCESS then
  1126. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetPropertiesName,
  1127. Get_CUDA_API_Error_String(Result)])
  1128. end;
  1129. function cuDeviceGetAttributeShell(pi: PSize_t; attrib: TCUdevice_attribute;
  1130. dev: TCUdevice): TCUresult;stdcall;
  1131. begin
  1132. Result := cuDeviceGetAttribute_(pi, attrib, dev);
  1133. if Result <> CUDA_SUCCESS then
  1134. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetAttributeName,
  1135. Get_CUDA_API_Error_String(Result)])
  1136. end;
  1137. function cuCtxCreateShell(var pctx: PCUcontext; Flags: Cardinal; dev: TCUdevice)
  1138. : TCUresult;stdcall;
  1139. begin
  1140. Result := cuCtxCreate_(pctx, Flags, dev);
  1141. if Result <> CUDA_SUCCESS then
  1142. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1143. [cuCtxCreateName, Get_CUDA_API_Error_String(Result)])
  1144. end;
  1145. function cuCtxDestroyShell(ctx: PCUcontext): TCUresult;stdcall;
  1146. begin
  1147. Result := cuCtxDestroy_(ctx);
  1148. if Result <> CUDA_SUCCESS then
  1149. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1150. [cuCtxDestroyName, Get_CUDA_API_Error_String(Result)])
  1151. end;
  1152. function cuCtxAttachShell(var pctx: PCUcontext; Flags: Cardinal): TCUresult;stdcall;
  1153. begin
  1154. Result := cuCtxAttach_(pctx, Flags);
  1155. if Result <> CUDA_SUCCESS then
  1156. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1157. [cuCtxAttachName, Get_CUDA_API_Error_String(Result)])
  1158. end;
  1159. function cuCtxDetachShell(ctx: PCUcontext): TCUresult;stdcall;
  1160. begin
  1161. Result := cuCtxDetach_(ctx);
  1162. if Result <> CUDA_SUCCESS then
  1163. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1164. [cuCtxDetachName, Get_CUDA_API_Error_String(Result)])
  1165. end;
  1166. function cuCtxPushCurrentShell(ctx: PCUcontext): TCUresult;stdcall;
  1167. begin
  1168. Result := cuCtxPushCurrent_(ctx);
  1169. if Result <> CUDA_SUCCESS then
  1170. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxPushCurrentName,
  1171. Get_CUDA_API_Error_String(Result)])
  1172. end;
  1173. function cuCtxPopCurrentShell(var pctx: PCUcontext): TCUresult;stdcall;
  1174. begin
  1175. Result := cuCtxPopCurrent_(pctx);
  1176. if Result <> CUDA_SUCCESS then
  1177. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxPopCurrentName,
  1178. Get_CUDA_API_Error_String(Result)])
  1179. end;
  1180. function cuCtxGetDeviceShell(var device: TCUdevice): TCUresult;stdcall;
  1181. begin
  1182. Result := cuCtxGetDevice_(device);
  1183. if Result <> CUDA_SUCCESS then
  1184. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1185. [cuCtxGetDeviceName, Get_CUDA_API_Error_String(Result)])
  1186. end;
  1187. function cuCtxSynchronizeShell: TCUresult;stdcall;
  1188. begin
  1189. Result := cuCtxSynchronize_;
  1190. if Result <> CUDA_SUCCESS then
  1191. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxSynchronizeName,
  1192. Get_CUDA_API_Error_String(Result)])
  1193. end;
  1194. function cuModuleLoadShell(var module: PCUmodule; const fname: PAnsiChar)
  1195. : TCUresult;stdcall;
  1196. begin
  1197. Result := cuModuleLoad_(module, fname);
  1198. if Result <> CUDA_SUCCESS then
  1199. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1200. [cuModuleLoadName, Get_CUDA_API_Error_String(Result)])
  1201. end;
  1202. function cuModuleLoadDataShell(var module: PCUmodule; const image: PAnsiChar)
  1203. : TCUresult;stdcall;
  1204. begin
  1205. Result := cuModuleLoadData_(module, image);
  1206. if Result <> CUDA_SUCCESS then
  1207. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadDataName,
  1208. Get_CUDA_API_Error_String(Result)])
  1209. end;
  1210. function cuModuleLoadDataExShell(var module: PCUmodule; var image;
  1211. numOptions: Cardinal; var options: TCUjit_option; var optionValues)
  1212. : TCUresult;stdcall;
  1213. begin
  1214. Result := cuModuleLoadDataEx_(module, image, numOptions, options,
  1215. optionValues);
  1216. if Result <> CUDA_SUCCESS then
  1217. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadDataExName,
  1218. Get_CUDA_API_Error_String(Result)])
  1219. end;
  1220. function cuModuleLoadFatBinaryShell(var module: PCUmodule; var fatCubin)
  1221. : TCUresult;stdcall;
  1222. begin
  1223. Result := cuModuleLoadFatBinary_(module, fatCubin);
  1224. if Result <> CUDA_SUCCESS then
  1225. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadFatBinaryName,
  1226. Get_CUDA_API_Error_String(Result)])
  1227. end;
  1228. function cuModuleUnloadShell(hmod: PCUmodule): TCUresult;stdcall;
  1229. begin
  1230. Result := cuModuleUnload_(hmod);
  1231. if Result <> CUDA_SUCCESS then
  1232. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1233. [cuModuleUnloadName, Get_CUDA_API_Error_String(Result)])
  1234. end;
  1235. function cuModuleGetFunctionShell(out hfunc: PCUfunction; hmod: PCUmodule;
  1236. const name: PAnsiChar): TCUresult;stdcall;
  1237. begin
  1238. Result := cuModuleGetFunction_(hfunc, hmod, name);
  1239. if Result <> CUDA_SUCCESS then
  1240. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetFunctionName,
  1241. Get_CUDA_API_Error_String(Result)])
  1242. end;
  1243. function cuModuleGetGlobalShell(out dptr: TCUdeviceptr; var bytes: Cardinal;
  1244. hmod: PCUmodule; const name: PAnsiChar): TCUresult;stdcall;
  1245. begin
  1246. Result := cuModuleGetGlobal_(dptr, bytes, hmod, name);
  1247. if Result <> CUDA_SUCCESS then
  1248. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetGlobalName,
  1249. Get_CUDA_API_Error_String(Result)])
  1250. end;
  1251. function cuModuleGetTexRefShell(out pTexRef: PCUtexref; hmod: PCUmodule;
  1252. const name: PAnsiChar): TCUresult;stdcall;
  1253. begin
  1254. Result := cuModuleGetTexRef_(pTexRef, hmod, name);
  1255. if Result <> CUDA_SUCCESS then
  1256. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetTexRefName,
  1257. Get_CUDA_API_Error_String(Result)])
  1258. end;
  1259. function cuMemGetInfoShell(var free: Cardinal; var total: Cardinal): TCUresult;stdcall;
  1260. begin
  1261. Result := cuMemGetInfo_(free, total);
  1262. if Result <> CUDA_SUCCESS then
  1263. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1264. [cuMemGetInfoName, Get_CUDA_API_Error_String(Result)])
  1265. end;
  1266. function cuMemAllocShell(var dptr: TCUdeviceptr; bytesize: Cardinal): TCUresult;stdcall;
  1267. begin
  1268. Result := cuMemAlloc_(dptr, bytesize);
  1269. if Result <> CUDA_SUCCESS then
  1270. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1271. [cuMemAllocName, Get_CUDA_API_Error_String(Result)])
  1272. end;
  1273. function cuMemAllocPitchShell(var dptr: TCUdeviceptr; var pPitch: Cardinal;
  1274. WidthInBytes: Cardinal; Height: Cardinal; ElementSizeBytes: Cardinal)
  1275. : TCUresult;stdcall;
  1276. begin
  1277. Result := cuMemAllocPitch_(dptr, pPitch, WidthInBytes, Height,
  1278. ElementSizeBytes);
  1279. if Result <> CUDA_SUCCESS then
  1280. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemAllocPitchName,
  1281. Get_CUDA_API_Error_String(Result)])
  1282. end;
  1283. function cuMemFreeShell(dptr: TCUdeviceptr): TCUresult;stdcall;
  1284. begin
  1285. Result := cuMemFree_(dptr);
  1286. if Result <> CUDA_SUCCESS then
  1287. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1288. [cuMemFreeName, Get_CUDA_API_Error_String(Result)])
  1289. end;
  1290. function cuMemGetAddressRangeShell(var pbase: TCUdeviceptr; var psize: Cardinal;
  1291. dptr: TCUdeviceptr): TCUresult;stdcall;
  1292. begin
  1293. Result := cuMemGetAddressRange_(pbase, psize, dptr);
  1294. if Result <> CUDA_SUCCESS then
  1295. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemGetAddressRangeName,
  1296. Get_CUDA_API_Error_String(Result)])
  1297. end;
  1298. function cuMemAllocHostShell(var pp; bytesize: Cardinal): TCUresult;stdcall;
  1299. begin
  1300. Result := cuMemAllocHost_(pp, bytesize);
  1301. if Result <> CUDA_SUCCESS then
  1302. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1303. [cuMemAllocHostName, Get_CUDA_API_Error_String(Result)])
  1304. end;
  1305. function cuMemFreeHostShell(p: Pointer): TCUresult;stdcall;
  1306. begin
  1307. Result := cuMemFreeHost_(p);
  1308. if Result <> CUDA_SUCCESS then
  1309. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1310. [cuMemFreeHostName, Get_CUDA_API_Error_String(Result)])
  1311. end;
  1312. function cuMemHostAllocShell(var pp: Pointer; bytesize: Cardinal; Flags: Cardinal)
  1313. : TCUresult;stdcall;
  1314. begin
  1315. Result := cuMemHostAlloc_(pp, bytesize, Flags);
  1316. if Result <> CUDA_SUCCESS then
  1317. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1318. [cuMemHostAllocName, Get_CUDA_API_Error_String(Result)])
  1319. end;
  1320. function cuMemHostGetDevicePointerShell(var pdptr: TCUdeviceptr; p: Pointer;
  1321. Flags: Cardinal): TCUresult;stdcall;
  1322. begin
  1323. Result := cuMemHostGetDevicePointer_(pdptr, p, Flags);
  1324. if Result <> CUDA_SUCCESS then
  1325. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemHostGetDevicePointerName,
  1326. Get_CUDA_API_Error_String(Result)])
  1327. end;
  1328. function cuMemHostGetFlagsShell(var pFlags: Cardinal; var p): TCUresult;stdcall;
  1329. begin
  1330. Result := cuMemHostGetFlags_(pFlags, p);
  1331. if Result <> CUDA_SUCCESS then
  1332. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemHostGetFlagsName,
  1333. Get_CUDA_API_Error_String(Result)])
  1334. end;
  1335. function cuMemcpyHtoDShell(dstDevice: TCUdeviceptr; const srcHost: Pointer;
  1336. ByteCount: Cardinal): TCUresult;stdcall;
  1337. begin
  1338. Result := cuMemcpyHtoD_(dstDevice, srcHost, ByteCount);
  1339. if Result <> CUDA_SUCCESS then
  1340. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1341. [cuMemcpyHtoDName, Get_CUDA_API_Error_String(Result)])
  1342. end;
  1343. function cuMemcpyDtoHShell(const dstHost: Pointer; srcDevice: TCUdeviceptr;
  1344. ByteCount: Cardinal): TCUresult;stdcall;
  1345. begin
  1346. Result := cuMemcpyDtoH_(dstHost, srcDevice, ByteCount);
  1347. if Result <> CUDA_SUCCESS then
  1348. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1349. [cuMemcpyDtoHName, Get_CUDA_API_Error_String(Result)])
  1350. end;
  1351. function cuMemcpyDtoDShell(dstDevice: TCUdeviceptr; srcDevice: TCUdeviceptr;
  1352. ByteCount: Cardinal): TCUresult;stdcall;
  1353. begin
  1354. Result := cuMemcpyDtoD_(dstDevice, srcDevice, ByteCount);
  1355. if Result <> CUDA_SUCCESS then
  1356. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1357. [cuMemcpyDtoDName, Get_CUDA_API_Error_String(Result)])
  1358. end;
  1359. function cuMemcpyDtoDAsyncShell(dstDevice: TCUdeviceptr;
  1360. srcDevice: TCUdeviceptr; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  1361. begin
  1362. Result := cuMemcpyDtoDAsync_(dstDevice, srcDevice, ByteCount, hStream);
  1363. if Result <> CUDA_SUCCESS then
  1364. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyDtoDAsyncName,
  1365. Get_CUDA_API_Error_String(Result)])
  1366. end;
  1367. function cuMemcpyDtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
  1368. srcDevice: TCUdeviceptr; ByteCount: Cardinal): TCUresult;stdcall;
  1369. begin
  1370. Result := cuMemcpyDtoA_(dstArray, dstIndex, srcDevice, ByteCount);
  1371. if Result <> CUDA_SUCCESS then
  1372. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1373. [cuMemcpyDtoAName, Get_CUDA_API_Error_String(Result)])
  1374. end;
  1375. function cuMemcpyAtoDShell(dstDevice: TCUdeviceptr; hSrc: PCUarray;
  1376. SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
  1377. begin
  1378. Result := cuMemcpyAtoD_(dstDevice, hSrc, SrcIndex, ByteCount);
  1379. if Result <> CUDA_SUCCESS then
  1380. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1381. [cuMemcpyAtoDName, Get_CUDA_API_Error_String(Result)])
  1382. end;
  1383. function cuMemcpyHtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
  1384. pSrc: Pointer; ByteCount: Cardinal): TCUresult;stdcall;
  1385. begin
  1386. Result := cuMemcpyHtoA_(dstArray, dstIndex, pSrc, ByteCount);
  1387. if Result <> CUDA_SUCCESS then
  1388. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1389. [cuMemcpyHtoAName, Get_CUDA_API_Error_String(Result)])
  1390. end;
  1391. function cuMemcpyAtoHShell(dstHost: Pointer; srcArray: PCUarray;
  1392. SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
  1393. begin
  1394. Result := cuMemcpyAtoH_(dstHost, srcArray, SrcIndex, ByteCount);
  1395. if Result <> CUDA_SUCCESS then
  1396. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1397. [cuMemcpyAtoHName, Get_CUDA_API_Error_String(Result)])
  1398. end;
  1399. function cuMemcpyAtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
  1400. srcArray: PCUarray; SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
  1401. begin
  1402. Result := cuMemcpyAtoA_(dstArray, dstIndex, srcArray, SrcIndex, ByteCount);
  1403. if Result <> CUDA_SUCCESS then
  1404. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1405. [cuMemcpyAtoAName, Get_CUDA_API_Error_String(Result)])
  1406. end;
  1407. function cuMemcpy2DShell(const pCopy: PCUDA_MEMCPY2D): TCUresult;stdcall;
  1408. begin
  1409. Result := cuMemcpy2D_(pCopy);
  1410. if Result <> CUDA_SUCCESS then
  1411. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1412. [cuMemcpy2DName, Get_CUDA_API_Error_String(Result)])
  1413. end;
  1414. function cuMemcpy2DUnalignedShell(var pCopy: TCUDA_MEMCPY2D): TCUresult;stdcall;
  1415. begin
  1416. Result := cuMemcpy2DUnaligned_(pCopy);
  1417. if Result <> CUDA_SUCCESS then
  1418. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy2DUnalignedName,
  1419. Get_CUDA_API_Error_String(Result)])
  1420. end;
  1421. function cuMemcpy3DShell(var pCopy: TCUDA_MEMCPY3D): TCUresult;stdcall;
  1422. begin
  1423. Result := cuMemcpy3D_(pCopy);
  1424. if Result <> CUDA_SUCCESS then
  1425. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1426. [cuMemcpy3DName, Get_CUDA_API_Error_String(Result)])
  1427. end;
  1428. function cuMemcpyHtoDAsyncShell(dstDevice: TCUdeviceptr; var srcHost;
  1429. ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  1430. begin
  1431. Result := cuMemcpyHtoDAsync_(dstDevice, srcHost, ByteCount, hStream);
  1432. if Result <> CUDA_SUCCESS then
  1433. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyHtoDAsyncName,
  1434. Get_CUDA_API_Error_String(Result)])
  1435. end;
  1436. function cuMemcpyDtoHAsyncShell(var dstHost; srcDevice: TCUdeviceptr;
  1437. ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  1438. begin
  1439. Result := cuMemcpyDtoHAsync_(dstHost, srcDevice, ByteCount, hStream);
  1440. if Result <> CUDA_SUCCESS then
  1441. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyDtoHAsyncName,
  1442. Get_CUDA_API_Error_String(Result)])
  1443. end;
  1444. function cuMemcpyHtoAAsyncShell(dstArray: PCUarray; dstIndex: Cardinal;
  1445. var pSrc; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  1446. begin
  1447. Result := cuMemcpyHtoAAsync_(dstArray, dstIndex, pSrc, ByteCount, hStream);
  1448. if Result <> CUDA_SUCCESS then
  1449. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyHtoAAsyncName,
  1450. Get_CUDA_API_Error_String(Result)])
  1451. end;
  1452. function cuMemcpyAtoHAsyncShell(var dstHost; srcArray: PCUstream;
  1453. SrcIndex: Cardinal; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  1454. begin
  1455. Result := cuMemcpyAtoHAsync_(dstHost, srcArray, SrcIndex, ByteCount, hStream);
  1456. if Result <> CUDA_SUCCESS then
  1457. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyAtoHAsyncName,
  1458. Get_CUDA_API_Error_String(Result)])
  1459. end;
  1460. function cuMemcpy2DAsyncShell(var pCopy: TCUDA_MEMCPY2D; hStream: PCUstream)
  1461. : TCUresult;stdcall;
  1462. begin
  1463. Result := cuMemcpy2DAsync_(pCopy, hStream);
  1464. if Result <> CUDA_SUCCESS then
  1465. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy2DAsyncName,
  1466. Get_CUDA_API_Error_String(Result)])
  1467. end;
  1468. function cuMemcpy3DAsyncShell(var pCopy: TCUDA_MEMCPY3D; hStream: PCUstream)
  1469. : TCUresult;stdcall;
  1470. begin
  1471. Result := cuMemcpy3DAsync_(pCopy, hStream);
  1472. if Result <> CUDA_SUCCESS then
  1473. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy3DAsyncName,
  1474. Get_CUDA_API_Error_String(Result)])
  1475. end;
  1476. function cuMemsetD8Shell(dstDevice: TCUdeviceptr; ub: Byte; N: Cardinal)
  1477. : TCUresult;stdcall;
  1478. begin
  1479. Result := cuMemsetD8_(dstDevice, ub, N);
  1480. if Result <> CUDA_SUCCESS then
  1481. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1482. [cuMemsetD8Name, Get_CUDA_API_Error_String(Result)])
  1483. end;
  1484. function cuMemsetD16Shell(dstDevice: TCUdeviceptr; uw: Word; N: Cardinal)
  1485. : TCUresult;stdcall;
  1486. begin
  1487. Result := cuMemsetD16_(dstDevice, uw, N);
  1488. if Result <> CUDA_SUCCESS then
  1489. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1490. [cuMemsetD16Name, Get_CUDA_API_Error_String(Result)])
  1491. end;
  1492. function cuMemsetD32Shell(dstDevice: TCUdeviceptr; ui: Cardinal; N: Cardinal)
  1493. : TCUresult;stdcall;
  1494. begin
  1495. Result := cuMemsetD32_(dstDevice, ui, N);
  1496. if Result <> CUDA_SUCCESS then
  1497. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1498. [cuMemsetD32Name, Get_CUDA_API_Error_String(Result)])
  1499. end;
  1500. function cuMemsetD2D8Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
  1501. ub: Byte; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
  1502. begin
  1503. Result := cuMemsetD2D8_(dstDevice, dstPitch, ub, Width, Height);
  1504. if Result <> CUDA_SUCCESS then
  1505. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1506. [cuMemsetD2D8Name, Get_CUDA_API_Error_String(Result)])
  1507. end;
  1508. function cuMemsetD2D16Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
  1509. uw: Word; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
  1510. begin
  1511. Result := cuMemsetD2D16_(dstDevice, dstPitch, uw, Width, Height);
  1512. if Result <> CUDA_SUCCESS then
  1513. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1514. [cuMemsetD2D16Name, Get_CUDA_API_Error_String(Result)])
  1515. end;
  1516. function cuMemsetD2D32Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
  1517. ui: Cardinal; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
  1518. begin
  1519. Result := cuMemsetD2D32_(dstDevice, dstPitch, ui, Width, Height);
  1520. if Result <> CUDA_SUCCESS then
  1521. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1522. [cuMemsetD2D32Name, Get_CUDA_API_Error_String(Result)])
  1523. end;
  1524. function cuFuncSetBlockShapeShell(hfunc: PCUfunction; x: Integer; y: Integer;
  1525. z: Integer): TCUresult;stdcall;
  1526. begin
  1527. Result := cuFuncSetBlockShape_(hfunc, x, y, z);
  1528. if Result <> CUDA_SUCCESS then
  1529. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncSetBlockShapeName,
  1530. Get_CUDA_API_Error_String(Result)])
  1531. end;
  1532. function cuFuncSetSharedSizeShell(hfunc: PCUfunction; bytes: Cardinal)
  1533. : TCUresult;
  1534. stdcall;
  1535. begin
  1536. Result := cuFuncSetSharedSize_(hfunc, bytes);
  1537. if Result <> CUDA_SUCCESS then
  1538. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncSetSharedSizeName,
  1539. Get_CUDA_API_Error_String(Result)])
  1540. end;
  1541. function cuFuncGetAttributeShell(var pi: Integer; attrib: TCUfunction_attribute;
  1542. hfunc: PCUfunction): TCUresult;stdcall;
  1543. begin
  1544. Result := cuFuncGetAttribute_(pi, attrib, hfunc);
  1545. if Result <> CUDA_SUCCESS then
  1546. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncGetAttributeName,
  1547. Get_CUDA_API_Error_String(Result)])
  1548. end;
  1549. function cuArrayCreateShell(var pHandle: PCUarray;
  1550. var pAllocateArray: TCUDA_ARRAY_DESCRIPTOR): TCUresult;stdcall;
  1551. begin
  1552. Result := cuArrayCreate_(pHandle, pAllocateArray);
  1553. if Result <> CUDA_SUCCESS then
  1554. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1555. [cuArrayCreateName, Get_CUDA_API_Error_String(Result)])
  1556. end;
  1557. function cuArrayGetDescriptorShell(var pArrayDescriptor: TCUDA_ARRAY_DESCRIPTOR;
  1558. hArray: PCUarray): TCUresult;stdcall;
  1559. begin
  1560. Result := cuArrayGetDescriptor_(pArrayDescriptor, hArray);
  1561. if Result <> CUDA_SUCCESS then
  1562. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArrayGetDescriptorName,
  1563. Get_CUDA_API_Error_String(Result)])
  1564. end;
  1565. function cuArrayDestroyShell(hArray: PCUarray): TCUresult;stdcall;
  1566. begin
  1567. Result := cuArrayDestroy_(hArray);
  1568. if Result <> CUDA_SUCCESS then
  1569. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1570. [cuArrayDestroyName, Get_CUDA_API_Error_String(Result)])
  1571. end;
  1572. function cuArray3DCreateShell(var pHandle: PCUarray;
  1573. var pAllocateArray: TCUDA_ARRAY3D_DESCRIPTOR): TCUresult;stdcall;
  1574. begin
  1575. Result := cuArray3DCreate_(pHandle, pAllocateArray);
  1576. if Result <> CUDA_SUCCESS then
  1577. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArray3DCreateName,
  1578. Get_CUDA_API_Error_String(Result)])
  1579. end;
  1580. function cuArray3DGetDescriptorShell(var pArrayDescriptor
  1581. : TCUDA_ARRAY3D_DESCRIPTOR; hArray: PCUarray): TCUresult;stdcall;
  1582. begin
  1583. Result := cuArray3DGetDescriptor_(pArrayDescriptor, hArray);
  1584. if Result <> CUDA_SUCCESS then
  1585. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArray3DGetDescriptorName,
  1586. Get_CUDA_API_Error_String(Result)])
  1587. end;
  1588. function cuTexRefCreateShell(var pTexRef: PCUtexref): TCUresult;stdcall;
  1589. begin
  1590. Result := cuTexRefCreate_(pTexRef);
  1591. if Result <> CUDA_SUCCESS then
  1592. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1593. [cuTexRefCreateName, Get_CUDA_API_Error_String(Result)])
  1594. end;
  1595. function cuTexRefDestroyShell(hTexRef: PCUtexref): TCUresult;stdcall;
  1596. begin
  1597. Result := cuTexRefDestroy_(hTexRef);
  1598. if Result <> CUDA_SUCCESS then
  1599. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefDestroyName,
  1600. Get_CUDA_API_Error_String(Result)])
  1601. end;
  1602. function cuTexRefSetArrayShell(hTexRef: PCUtexref; hArray: PCUarray;
  1603. Flags: Cardinal): TCUresult;stdcall;
  1604. begin
  1605. Result := cuTexRefSetArray_(hTexRef, hArray, Flags);
  1606. if Result <> CUDA_SUCCESS then
  1607. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetArrayName,
  1608. Get_CUDA_API_Error_String(Result)])
  1609. end;
  1610. function cuTexRefSetAddressShell(var ByteOffset: Cardinal; hTexRef: PCUtexref;
  1611. dptr: TCUdeviceptr; bytes: Cardinal): TCUresult;stdcall;
  1612. begin
  1613. Result := cuTexRefSetAddress_(ByteOffset, hTexRef, dptr, bytes);
  1614. if Result <> CUDA_SUCCESS then
  1615. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddressName,
  1616. Get_CUDA_API_Error_String(Result)])
  1617. end;
  1618. function cuTexRefSetAddress2DShell(hTexRef: PCUtexref;
  1619. var desc: TCUDA_ARRAY_DESCRIPTOR; dptr: TCUdeviceptr; Pitch: Cardinal)
  1620. : TCUresult;
  1621. stdcall;
  1622. begin
  1623. Result := cuTexRefSetAddress2D_(hTexRef, desc, dptr, Pitch);
  1624. if Result <> CUDA_SUCCESS then
  1625. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddress2DName,
  1626. Get_CUDA_API_Error_String(Result)])
  1627. end;
  1628. function cuTexRefSetFormatShell(hTexRef: PCUtexref; fmt: TCUarray_format;
  1629. NumPackedComponents: Integer): TCUresult;stdcall;
  1630. begin
  1631. Result := cuTexRefSetFormat_(hTexRef, fmt, NumPackedComponents);
  1632. if Result <> CUDA_SUCCESS then
  1633. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFormatName,
  1634. Get_CUDA_API_Error_String(Result)])
  1635. end;
  1636. function cuTexRefSetAddressModeShell(hTexRef: PCUtexref; dim: Integer;
  1637. am: TCUaddress_mode): TCUresult;stdcall;
  1638. begin
  1639. Result := cuTexRefSetAddressMode_(hTexRef, dim, am);
  1640. if Result <> CUDA_SUCCESS then
  1641. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddressModeName,
  1642. Get_CUDA_API_Error_String(Result)])
  1643. end;
  1644. function cuTexRefSetFilterModeShell(hTexRef: PCUtexref; fm: TCUfilter_mode)
  1645. : TCUresult;stdcall;
  1646. begin
  1647. Result := cuTexRefSetFilterMode_(hTexRef, fm);
  1648. if Result <> CUDA_SUCCESS then
  1649. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFilterModeName,
  1650. Get_CUDA_API_Error_String(Result)])
  1651. end;
  1652. function cuTexRefSetFlagsShell(hTexRef: PCUtexref; Flags: Cardinal): TCUresult;stdcall;
  1653. begin
  1654. Result := cuTexRefSetFlags_(hTexRef, Flags);
  1655. if Result <> CUDA_SUCCESS then
  1656. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFlagsName,
  1657. Get_CUDA_API_Error_String(Result)])
  1658. end;
  1659. function cuTexRefGetAddressShell(var pdptr: TCUdeviceptr; hTexRef: PCUtexref)
  1660. : TCUresult;stdcall;
  1661. begin
  1662. Result := cuTexRefGetAddress_(pdptr, hTexRef);
  1663. if Result <> CUDA_SUCCESS then
  1664. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetAddressName,
  1665. Get_CUDA_API_Error_String(Result)])
  1666. end;
  1667. function cuTexRefGetArrayShell(var phArray: PCUarray; hTexRef: PCUtexref)
  1668. : TCUresult;stdcall;
  1669. begin
  1670. Result := cuTexRefGetArray_(phArray, hTexRef);
  1671. if Result <> CUDA_SUCCESS then
  1672. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetArrayName,
  1673. Get_CUDA_API_Error_String(Result)])
  1674. end;
  1675. function cuTexRefGetAddressModeShell(var pam: TCUaddress_mode;
  1676. hTexRef: PCUtexref; dim: Integer): TCUresult;stdcall;
  1677. begin
  1678. Result := cuTexRefGetAddressMode_(pam, hTexRef, dim);
  1679. if Result <> CUDA_SUCCESS then
  1680. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetAddressModeName,
  1681. Get_CUDA_API_Error_String(Result)])
  1682. end;
  1683. function cuTexRefGetFilterModeShell(var pfm: TCUfilter_mode; hTexRef: PCUtexref)
  1684. : TCUresult;stdcall;
  1685. begin
  1686. Result := cuTexRefGetFilterMode_(pfm, hTexRef);
  1687. if Result <> CUDA_SUCCESS then
  1688. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFilterModeName,
  1689. Get_CUDA_API_Error_String(Result)])
  1690. end;
  1691. function cuTexRefGetFormatShell(var pFormat: TCUarray_format;
  1692. var pNumChannels: Integer; hTexRef: PCUtexref): TCUresult;stdcall;
  1693. begin
  1694. Result := cuTexRefGetFormat_(pFormat, pNumChannels, hTexRef);
  1695. if Result <> CUDA_SUCCESS then
  1696. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFormatName,
  1697. Get_CUDA_API_Error_String(Result)])
  1698. end;
  1699. function cuTexRefGetFlagsShell(var pFlags: Cardinal; hTexRef: PCUtexref)
  1700. : TCUresult;stdcall;
  1701. begin
  1702. Result := cuTexRefGetFlags_(pFlags, hTexRef);
  1703. if Result <> CUDA_SUCCESS then
  1704. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFlagsName,
  1705. Get_CUDA_API_Error_String(Result)])
  1706. end;
  1707. function cuParamSetSizeShell(hfunc: PCUfunction; numbytes: Cardinal): TCUresult;stdcall;
  1708. begin
  1709. Result := cuParamSetSize_(hfunc, numbytes);
  1710. if Result <> CUDA_SUCCESS then
  1711. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1712. [cuParamSetSizeName, Get_CUDA_API_Error_String(Result)])
  1713. end;
  1714. function cuParamSetiShell(hfunc: PCUfunction; offset: Integer; value: Cardinal)
  1715. : TCUresult;stdcall;
  1716. begin
  1717. Result := cuParamSeti_(hfunc, offset, value);
  1718. if Result <> CUDA_SUCCESS then
  1719. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1720. [cuParamSetiName, Get_CUDA_API_Error_String(Result)])
  1721. end;
  1722. function cuParamSetfShell(hfunc: PCUfunction; offset: Integer; value: Single)
  1723. : TCUresult;stdcall;
  1724. begin
  1725. Result := cuParamSetf_(hfunc, offset, value);
  1726. if Result <> CUDA_SUCCESS then
  1727. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1728. [cuParamSetfName, Get_CUDA_API_Error_String(Result)])
  1729. end;
  1730. function cuParamSetvShell(hfunc: PCUfunction; offset: Integer; var ptr;
  1731. numbytes: Cardinal): TCUresult;stdcall;
  1732. begin
  1733. Result := cuParamSetv_(hfunc, offset, ptr, numbytes);
  1734. if Result <> CUDA_SUCCESS then
  1735. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1736. [cuParamSetvName, Get_CUDA_API_Error_String(Result)])
  1737. end;
  1738. function cuParamSetTexRefShell(hfunc: PCUfunction; texunit: Integer;
  1739. hTexRef: PCUtexref): TCUresult;stdcall;
  1740. begin
  1741. Result := cuParamSetTexRef_(hfunc, texunit, hTexRef);
  1742. if Result <> CUDA_SUCCESS then
  1743. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuParamSetTexRefName,
  1744. Get_CUDA_API_Error_String(Result)])
  1745. end;
  1746. function cuLaunchShell(f: PCUfunction): TCUresult;stdcall;
  1747. begin
  1748. Result := cuLaunch_(f);
  1749. if Result <> CUDA_SUCCESS then
  1750. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1751. [cuLaunchName, Get_CUDA_API_Error_String(Result)])
  1752. end;
  1753. function cuLaunchGridShell(f: PCUfunction; grid_width: Integer;
  1754. grid_height: Integer): TCUresult;stdcall;
  1755. begin
  1756. Result := cuLaunchGrid_(f, grid_width, grid_height);
  1757. if Result <> CUDA_SUCCESS then
  1758. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1759. [cuLaunchGridName, Get_CUDA_API_Error_String(Result)])
  1760. end;
  1761. function cuLaunchGridAsyncShell(f: PCUfunction; grid_width: Integer;
  1762. grid_height: Integer; hStream: PCUstream): TCUresult;stdcall;
  1763. begin
  1764. Result := cuLaunchGridAsync_(f, grid_width, grid_height, hStream);
  1765. if Result <> CUDA_SUCCESS then
  1766. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuLaunchGridAsyncName,
  1767. Get_CUDA_API_Error_String(Result)])
  1768. end;
  1769. function cuEventCreateShell(var phEvent: PCUevent; Flags: Cardinal): TCUresult;stdcall;
  1770. begin
  1771. Result := cuEventCreate_(phEvent, Flags);
  1772. if Result <> CUDA_SUCCESS then
  1773. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1774. [cuEventCreateName, Get_CUDA_API_Error_String(Result)])
  1775. end;
  1776. function cuEventRecordShell(hEvent: PCUevent; hStream: PCUstream): TCUresult;stdcall;
  1777. begin
  1778. Result := cuEventRecord_(hEvent, hStream);
  1779. if Result <> CUDA_SUCCESS then
  1780. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1781. [cuEventRecordName, Get_CUDA_API_Error_String(Result)])
  1782. end;
  1783. function cuEventQueryShell(hEvent: PCUevent): TCUresult;stdcall;
  1784. begin
  1785. Result := cuEventQuery_(hEvent);
  1786. if Result <> CUDA_SUCCESS then
  1787. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1788. [cuEventQueryName, Get_CUDA_API_Error_String(Result)])
  1789. end;
  1790. function cuEventSynchronizeShell(hEvent: PCUevent): TCUresult;stdcall;
  1791. begin
  1792. Result := cuEventSynchronize_(hEvent);
  1793. if Result <> CUDA_SUCCESS then
  1794. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuEventSynchronizeName,
  1795. Get_CUDA_API_Error_String(Result)])
  1796. end;
  1797. function cuEventDestroyShell(hEvent: PCUevent): TCUresult;stdcall;
  1798. begin
  1799. Result := cuEventDestroy_(hEvent);
  1800. if Result <> CUDA_SUCCESS then
  1801. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1802. [cuEventDestroyName, Get_CUDA_API_Error_String(Result)])
  1803. end;
  1804. function cuEventElapsedTimeShell(var pMilliseconds: Single; hStart: PCUevent;
  1805. hEnd: PCUevent): TCUresult;stdcall;
  1806. begin
  1807. Result := cuEventElapsedTime_(pMilliseconds, hStart, hEnd);
  1808. if Result <> CUDA_SUCCESS then
  1809. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuEventElapsedTimeName,
  1810. Get_CUDA_API_Error_String(Result)])
  1811. end;
  1812. function cuStreamCreateShell(var phStream: PCUstream; Flags: Cardinal)
  1813. : TCUresult;stdcall;
  1814. begin
  1815. Result := cuStreamCreate_(phStream, Flags);
  1816. if Result <> CUDA_SUCCESS then
  1817. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1818. [cuStreamCreateName, Get_CUDA_API_Error_String(Result)])
  1819. end;
  1820. function cuStreamQueryShell(hStream: PCUstream): TCUresult;stdcall;
  1821. begin
  1822. Result := cuStreamQuery_(hStream);
  1823. if Result <> CUDA_SUCCESS then
  1824. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1825. [cuStreamQueryName, Get_CUDA_API_Error_String(Result)])
  1826. end;
  1827. function cuStreamSynchronizeShell(hStream: PCUstream): TCUresult;stdcall;
  1828. begin
  1829. Result := cuStreamSynchronize_(hStream);
  1830. if Result <> CUDA_SUCCESS then
  1831. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuStreamSynchronizeName,
  1832. Get_CUDA_API_Error_String(Result)])
  1833. end;
  1834. function cuStreamDestroyShell(hStream: PCUstream): TCUresult;
  1835. stdcall;
  1836. begin
  1837. Result := cuStreamDestroy_(hStream);
  1838. if Result <> CUDA_SUCCESS then
  1839. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuStreamDestroyName,
  1840. Get_CUDA_API_Error_String(Result)])
  1841. end;
  1842. function cuGLCtxCreateShell(var pctx: PCUcontext; Flags: Cardinal;
  1843. device: TCUdevice): TCUresult;
  1844. stdcall;
  1845. begin
  1846. Result := cuGLCtxCreate_(pctx, Flags, device);
  1847. if Result <> CUDA_SUCCESS then
  1848. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1849. [cuGLCtxCreateName, Get_CUDA_API_Error_String(Result)])
  1850. end;
  1851. function cuGraphicsGLRegisterBufferShell(var pCudaResource: PCUgraphicsResource;
  1852. buffer: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;
  1853. stdcall;
  1854. begin
  1855. Result := cuGraphicsGLRegisterBuffer_(pCudaResource, buffer, Flags);
  1856. if Result <> CUDA_SUCCESS then
  1857. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsGLRegisterBufferName,
  1858. Get_CUDA_API_Error_String(Result)])
  1859. end;
  1860. function cuGraphicsGLRegisterImageShell(var pCudaResource: PCUgraphicsResource;
  1861. image, target: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;stdcall;
  1862. begin
  1863. Result := cuGraphicsGLRegisterImage_(pCudaResource, image, target, Flags);
  1864. if Result <> CUDA_SUCCESS then
  1865. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsGLRegisterImageName,
  1866. Get_CUDA_API_Error_String(Result)])
  1867. end;
  1868. function cuWGLGetDeviceShell(var pDevice: TCUdevice; hGpu: HGPUNV): TCUresult;stdcall;
  1869. begin
  1870. Result := cuWGLGetDevice_(pDevice, hGpu);
  1871. if Result <> CUDA_SUCCESS then
  1872. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1873. [cuWGLGetDeviceName, Get_CUDA_API_Error_String(Result)])
  1874. end;
  1875. function cuGraphicsUnregisterResourceShell(resource: PCUgraphicsResource)
  1876. : TCUresult;stdcall;
  1877. begin
  1878. Result := cuGraphicsUnregisterResource_(resource);
  1879. if Result <> CUDA_SUCCESS then
  1880. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsUnregisterResourceName,
  1881. Get_CUDA_API_Error_String(Result)])
  1882. end;
  1883. function cuGraphicsSubResourceGetMappedArrayShell(var pArray: PCUarray;
  1884. resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal)
  1885. : TCUresult;stdcall;
  1886. begin
  1887. Result := cuGraphicsSubResourceGetMappedArray_(pArray, resource, arrayIndex,
  1888. mipLevel);
  1889. if Result <> CUDA_SUCCESS then
  1890. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1891. [cuGraphicsSubResourceGetMappedArrayName,
  1892. Get_CUDA_API_Error_String(Result)])
  1893. end;
  1894. function cuGraphicsResourceGetMappedPointerShell(var pDevPtr: TCUdeviceptr;
  1895. out psize: Cardinal; resource: PCUgraphicsResource): TCUresult;stdcall;
  1896. begin
  1897. Result := cuGraphicsResourceGetMappedPointer_(pDevPtr, psize, resource);
  1898. if Result <> CUDA_SUCCESS then
  1899. GLSLogger.LogErrorFmt(cudasFuncRetErr,
  1900. [cuGraphicsResourceGetMappedPointerName,
  1901. Get_CUDA_API_Error_String(Result)])
  1902. end;
  1903. function cuGraphicsResourceSetMapFlagsShell(resource: PCUgraphicsResource;
  1904. Flags: Cardinal): TCUresult;stdcall;
  1905. begin
  1906. Result := cuGraphicsResourceSetMapFlags_(resource, Flags);
  1907. if Result <> CUDA_SUCCESS then
  1908. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsResourceSetMapFlagsName,
  1909. Get_CUDA_API_Error_String(Result)])
  1910. end;
  1911. function cuGraphicsMapResourcesShell(count: Cardinal;
  1912. resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
  1913. begin
  1914. Result := cuGraphicsMapResources_(count, resources, hStream);
  1915. if Result <> CUDA_SUCCESS then
  1916. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsMapResourcesName,
  1917. Get_CUDA_API_Error_String(Result)])
  1918. end;
  1919. function cuGraphicsUnmapResourcesShell(count: Cardinal;
  1920. resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
  1921. begin
  1922. Result := cuGraphicsUnmapResources_(count, resources, hStream);
  1923. if Result <> CUDA_SUCCESS then
  1924. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsUnmapResourcesName,
  1925. Get_CUDA_API_Error_String(Result)])
  1926. end;
  1927. function cuGLRegisterBufferObjectShell(buffer: Cardinal): TCUresult;stdcall;
  1928. begin
  1929. Result := cuGLRegisterBufferObject_(buffer);
  1930. if Result <> CUDA_SUCCESS then
  1931. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLRegisterBufferObjectName,
  1932. Get_CUDA_API_Error_String(Result)])
  1933. end;
  1934. function cuGLMapBufferObjectShell(var dptr: TCUdeviceptr; var size: Cardinal;
  1935. buffer: Cardinal): TCUresult;stdcall;
  1936. begin
  1937. Result := cuGLMapBufferObject_(dptr, size, buffer);
  1938. if Result <> CUDA_SUCCESS then
  1939. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLMapBufferObjectName,
  1940. Get_CUDA_API_Error_String(Result)])
  1941. end;
  1942. function cuGLUnmapBufferObjectShell(buffer: Cardinal): TCUresult;stdcall;
  1943. begin
  1944. Result := cuGLUnmapBufferObject_(buffer);
  1945. if Result <> CUDA_SUCCESS then
  1946. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnmapBufferObjectName,
  1947. Get_CUDA_API_Error_String(Result)])
  1948. end;
  1949. function cuGLUnregisterBufferObjectShell(buffer: Cardinal): TCUresult;stdcall;
  1950. begin
  1951. Result := cuGLUnregisterBufferObject_(buffer);
  1952. if Result <> CUDA_SUCCESS then
  1953. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnregisterBufferObjectName,
  1954. Get_CUDA_API_Error_String(Result)])
  1955. end;
  1956. function cuGLSetBufferObjectMapFlagsShell(buffer: Cardinal; Flags: Cardinal)
  1957. : TCUresult;stdcall;
  1958. begin
  1959. Result := cuGLSetBufferObjectMapFlags_(buffer, Flags);
  1960. if Result <> CUDA_SUCCESS then
  1961. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLSetBufferObjectMapFlagsName,
  1962. Get_CUDA_API_Error_String(Result)])
  1963. end;
  1964. function cuGLMapBufferObjectAsyncShell(var dptr: TCUdeviceptr;
  1965. var size: Cardinal; buffer: Cardinal; hStream: PCUstream): TCUresult;stdcall;
  1966. begin
  1967. Result := cuGLMapBufferObjectAsync_(dptr, size, buffer, hStream);
  1968. if Result <> CUDA_SUCCESS then
  1969. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLMapBufferObjectAsyncName,
  1970. Get_CUDA_API_Error_String(Result)])
  1971. end;
  1972. function cuGLUnmapBufferObjectAsyncShell(buffer: Cardinal; hStream: PCUstream)
  1973. : TCUresult;stdcall;
  1974. begin
  1975. Result := cuGLUnmapBufferObjectAsync_(buffer, hStream);
  1976. if Result <> CUDA_SUCCESS then
  1977. GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnmapBufferObjectAsyncName,
  1978. Get_CUDA_API_Error_String(Result)])
  1979. end;
  1980. {$ENDIF GLS_CUDA_DEBUG_MODE}
  1981. function GetProcAddressCUDA(ProcName: PAnsiChar): Pointer;
  1982. var
  1983. Alt: AnsiString;
  1984. begin
  1985. Alt := AnsiString(ProcName) + '_v2';
  1986. Result := GetProcAddress(Cardinal(CUDAHandle), PAnsiChar(Alt));
  1987. if Result = nil then
  1988. Result := GetProcAddress(Cardinal(CUDAHandle), ProcName);
  1989. end;
  1990. function InitCUDA: Boolean;
  1991. begin
  1992. if CUDAHandle = INVALID_MODULEHANDLE then
  1993. Result := InitCUDAFromLibrary(CUDAAPIDLL)
  1994. else
  1995. Result := True;
  1996. end;
  1997. procedure CloseCUDA;
  1998. begin
  1999. if CUDAHandle <> INVALID_MODULEHANDLE then
  2000. begin
  2001. FreeLibrary(Cardinal(CUDAHandle));
  2002. CUDAHandle := INVALID_MODULEHANDLE;
  2003. end;
  2004. end;
  2005. function InitCUDAFromLibrary(const LibName: WideString): Boolean;
  2006. var
  2007. V: Integer;
  2008. begin
  2009. Result := False;
  2010. CloseCUDA;
  2011. CUDAHandle := GetModuleHandleW(PWideChar(LibName));
  2012. if CUDAHandle = INVALID_MODULEHANDLE then
  2013. CUDAHandle := LoadLibraryW(PWideChar(LibName));
  2014. if CUDAHandle = INVALID_MODULEHANDLE then
  2015. Exit;
  2016. {$IFNDEF USE_CUDA_DEBUG_MODE}
  2017. cuInit := GetProcAddressCUDA(cuInitName);
  2018. cuDriverGetVersion := GetProcAddressCUDA(cuDriverGetVersionName);
  2019. cuDeviceGet := GetProcAddressCUDA(cuDeviceGet_Name);
  2020. cuDeviceGetCount := GetProcAddressCUDA(cuDeviceGetCountName);
  2021. cuDeviceGetName := GetProcAddressCUDA(cuDeviceGetNameName);
  2022. cuDeviceComputeCapability := GetProcAddressCUDA(cuDeviceComputeCapabilityName);
  2023. cuDeviceTotalMem := GetProcAddressCUDA(cuDeviceTotalMemName);
  2024. cuDeviceGetProperties := GetProcAddressCUDA(cuDeviceGetPropertiesName);
  2025. cuDeviceGetAttribute := GetProcAddressCUDA(cuDeviceGetAttributeName);
  2026. cuCtxCreate := GetProcAddressCUDA(cuCtxCreateName);
  2027. cuCtxDestroy := GetProcAddressCUDA(cuCtxDestroyName);
  2028. cuCtxAttach := GetProcAddressCUDA(cuCtxAttachName);
  2029. cuCtxDetach := GetProcAddressCUDA(cuCtxDetachName);
  2030. cuCtxPushCurrent := GetProcAddressCUDA(cuCtxPushCurrentName);
  2031. cuCtxPopCurrent := GetProcAddressCUDA(cuCtxPopCurrentName);
  2032. cuCtxGetDevice := GetProcAddressCUDA(cuCtxGetDeviceName);
  2033. cuCtxSynchronize := GetProcAddressCUDA(cuCtxSynchronizeName);
  2034. cuModuleLoad := GetProcAddressCUDA(cuModuleLoadName);
  2035. cuModuleLoadData := GetProcAddressCUDA(cuModuleLoadDataName);
  2036. cuModuleLoadDataEx := GetProcAddressCUDA(cuModuleLoadDataExName);
  2037. cuModuleLoadFatBinary := GetProcAddressCUDA(cuModuleLoadFatBinaryName);
  2038. cuModuleUnload := GetProcAddressCUDA(cuModuleUnloadName);
  2039. cuModuleGetFunction := GetProcAddressCUDA(cuModuleGetFunctionName);
  2040. cuModuleGetGlobal := GetProcAddressCUDA(cuModuleGetGlobalName);
  2041. cuModuleGetTexRef := GetProcAddressCUDA(cuModuleGetTexRefName);
  2042. cuMemGetInfo := GetProcAddressCUDA(cuMemGetInfoName);
  2043. cuMemAlloc := GetProcAddressCUDA(cuMemAllocName);
  2044. cuMemAllocPitch := GetProcAddressCUDA(cuMemAllocPitchName);
  2045. cuMemFree := GetProcAddressCUDA(cuMemFreeName);
  2046. cuMemGetAddressRange := GetProcAddressCUDA(cuMemGetAddressRangeName);
  2047. cuMemAllocHost := GetProcAddressCUDA(cuMemAllocHostName);
  2048. cuMemFreeHost := GetProcAddressCUDA(cuMemFreeHostName);
  2049. cuMemHostAlloc := GetProcAddressCUDA(cuMemHostAllocName);
  2050. cuMemHostGetDevicePointer := GetProcAddressCUDA(cuMemHostGetDevicePointerName);
  2051. cuMemHostGetFlags := GetProcAddressCUDA(cuMemHostGetFlagsName);
  2052. cuMemcpyHtoD := GetProcAddressCUDA(cuMemcpyHtoDName);
  2053. cuMemcpyDtoH := GetProcAddressCUDA(cuMemcpyDtoHName);
  2054. cuMemcpyDtoD := GetProcAddressCUDA(cuMemcpyDtoDName);
  2055. cuMemcpyDtoDAsync := GetProcAddressCUDA(cuMemcpyDtoDAsyncName);
  2056. cuMemcpyDtoA := GetProcAddressCUDA(cuMemcpyDtoAName);
  2057. cuMemcpyAtoD := GetProcAddressCUDA(cuMemcpyAtoDName);
  2058. cuMemcpyHtoA := GetProcAddressCUDA(cuMemcpyHtoAName);
  2059. cuMemcpyAtoH := GetProcAddressCUDA(cuMemcpyAtoHName);
  2060. cuMemcpyAtoA := GetProcAddressCUDA(cuMemcpyAtoAName);
  2061. cuMemcpy2D := GetProcAddressCUDA(cuMemcpy2DName);
  2062. cuMemcpy2DUnaligned := GetProcAddressCUDA(cuMemcpy2DUnalignedName);
  2063. cuMemcpy3D := GetProcAddressCUDA(cuMemcpy3DName);
  2064. cuMemcpyHtoDAsync := GetProcAddressCUDA(cuMemcpyHtoDAsyncName);
  2065. cuMemcpyDtoHAsync := GetProcAddressCUDA(cuMemcpyDtoHAsyncName);
  2066. cuMemcpyHtoAAsync := GetProcAddressCUDA(cuMemcpyHtoAAsyncName);
  2067. cuMemcpyAtoHAsync := GetProcAddressCUDA(cuMemcpyAtoHAsyncName);
  2068. cuMemcpy2DAsync := GetProcAddressCUDA(cuMemcpy2DAsyncName);
  2069. cuMemcpy3DAsync := GetProcAddressCUDA(cuMemcpy3DAsyncName);
  2070. cuMemsetD8 := GetProcAddressCUDA(cuMemsetD8Name);
  2071. cuMemsetD16 := GetProcAddressCUDA(cuMemsetD16Name);
  2072. cuMemsetD32 := GetProcAddressCUDA(cuMemsetD32Name);
  2073. cuMemsetD2D8 := GetProcAddressCUDA(cuMemsetD2D8Name);
  2074. cuMemsetD2D16 := GetProcAddressCUDA(cuMemsetD2D16Name);
  2075. cuMemsetD2D32 := GetProcAddressCUDA(cuMemsetD2D32Name);
  2076. cuFuncSetBlockShape := GetProcAddressCUDA(cuFuncSetBlockShapeName);
  2077. cuFuncSetSharedSize := GetProcAddressCUDA(cuFuncSetSharedSizeName);
  2078. cuFuncGetAttribute := GetProcAddressCUDA(cuFuncGetAttributeName);
  2079. cuArrayCreate := GetProcAddressCUDA(cuArrayCreateName);
  2080. cuArrayGetDescriptor := GetProcAddressCUDA(cuArrayGetDescriptorName);
  2081. cuArrayDestroy := GetProcAddressCUDA(cuArrayDestroyName);
  2082. cuArray3DCreate := GetProcAddressCUDA(cuArray3DCreateName);
  2083. cuArray3DGetDescriptor := GetProcAddressCUDA(cuArray3DGetDescriptorName);
  2084. cuTexRefCreate := GetProcAddressCUDA(cuTexRefCreateName);
  2085. cuTexRefDestroy := GetProcAddressCUDA(cuTexRefDestroyName);
  2086. cuTexRefSetArray := GetProcAddressCUDA(cuTexRefSetArrayName);
  2087. cuTexRefSetAddress := GetProcAddressCUDA(cuTexRefSetAddressName);
  2088. cuTexRefSetAddress2D := GetProcAddressCUDA(cuTexRefSetAddress2DName);
  2089. cuTexRefSetFormat := GetProcAddressCUDA(cuTexRefSetFormatName);
  2090. cuTexRefSetAddressMode := GetProcAddressCUDA(cuTexRefSetAddressModeName);
  2091. cuTexRefSetFilterMode := GetProcAddressCUDA(cuTexRefSetFilterModeName);
  2092. cuTexRefSetFlags := GetProcAddressCUDA(cuTexRefSetFlagsName);
  2093. cuTexRefGetAddress := GetProcAddressCUDA(cuTexRefGetAddressName);
  2094. cuTexRefGetArray := GetProcAddressCUDA(cuTexRefGetArrayName);
  2095. cuTexRefGetAddressMode := GetProcAddressCUDA(cuTexRefGetAddressModeName);
  2096. cuTexRefGetFilterMode := GetProcAddressCUDA(cuTexRefGetFilterModeName);
  2097. cuTexRefGetFormat := GetProcAddressCUDA(cuTexRefGetFormatName);
  2098. cuTexRefGetFlags := GetProcAddressCUDA(cuTexRefGetFlagsName);
  2099. cuParamSetSize := GetProcAddressCUDA(cuParamSetSizeName);
  2100. cuParamSeti := GetProcAddressCUDA(cuParamSetiName);
  2101. cuParamSetf := GetProcAddressCUDA(cuParamSetfName);
  2102. cuParamSetv := GetProcAddressCUDA(cuParamSetvName);
  2103. cuParamSetTexRef := GetProcAddressCUDA(cuParamSetTexRefName);
  2104. cuLaunch := GetProcAddressCUDA(cuLaunchName);
  2105. cuLaunchGrid := GetProcAddressCUDA(cuLaunchGridName);
  2106. cuLaunchGridAsync := GetProcAddressCUDA(cuLaunchGridAsyncName);
  2107. cuEventCreate := GetProcAddressCUDA(cuEventCreateName);
  2108. cuEventRecord := GetProcAddressCUDA(cuEventRecordName);
  2109. cuEventQuery := GetProcAddressCUDA(cuEventQueryName);
  2110. cuEventSynchronize := GetProcAddressCUDA(cuEventSynchronizeName);
  2111. cuEventDestroy := GetProcAddressCUDA(cuEventDestroyName);
  2112. cuEventElapsedTime := GetProcAddressCUDA(cuEventElapsedTimeName);
  2113. cuStreamCreate := GetProcAddressCUDA(cuStreamCreateName);
  2114. cuStreamQuery := GetProcAddressCUDA(cuStreamQueryName);
  2115. cuStreamSynchronize := GetProcAddressCUDA(cuStreamSynchronizeName);
  2116. cuStreamDestroy := GetProcAddressCUDA(cuStreamDestroyName);
  2117. cuGLCtxCreate := GetProcAddressCUDA(cuGLCtxCreateName);
  2118. cuGraphicsGLRegisterBuffer := GetProcAddressCUDA(cuGraphicsGLRegisterBufferName);
  2119. cuGraphicsGLRegisterImage := GetProcAddressCUDA(cuGraphicsGLRegisterImageName);
  2120. cuWGLGetDevice := GetProcAddressCUDA(cuWGLGetDeviceName);
  2121. cuGraphicsUnregisterResource := GetProcAddressCUDA(cuGraphicsUnregisterResourceName);
  2122. cuGraphicsSubResourceGetMappedArray := GetProcAddressCUDA(cuGraphicsSubResourceGetMappedArrayName);
  2123. cuGraphicsResourceGetMappedPointer := GetProcAddressCUDA(cuGraphicsResourceGetMappedPointerName);
  2124. cuGraphicsResourceSetMapFlags := GetProcAddressCUDA(cuGraphicsResourceSetMapFlagsName);
  2125. cuGraphicsMapResources := GetProcAddressCUDA(cuGraphicsMapResourcesName);
  2126. cuGraphicsUnmapResources := GetProcAddressCUDA(cuGraphicsUnmapResourcesName);
  2127. cuGLInit := GetProcAddressCUDA(cuGLInitName);
  2128. cuGLRegisterBufferObject := GetProcAddressCUDA(cuGLRegisterBufferObjectName);
  2129. cuGLMapBufferObject := GetProcAddressCUDA(cuGLMapBufferObjectName);
  2130. cuGLUnmapBufferObject := GetProcAddressCUDA(cuGLUnmapBufferObjectName);
  2131. cuGLUnregisterBufferObject := GetProcAddressCUDA(cuGLUnregisterBufferObjectName);
  2132. cuGLSetBufferObjectMapFlags := GetProcAddressCUDA(cuGLSetBufferObjectMapFlagsName);
  2133. cuGLMapBufferObjectAsync := GetProcAddressCUDA(cuGLMapBufferObjectAsyncName);
  2134. cuGLUnmapBufferObjectAsync := GetProcAddressCUDA(cuGLUnmapBufferObjectAsyncName);
  2135. {$ELSE}
  2136. cuInit_ := GetProcAddressCUDA(cuInitName);
  2137. cuInit := cuInitShell;
  2138. cuDriverGetVersion_ := GetProcAddressCUDA(cuDriverGetVersionName);
  2139. cuDriverGetVersion := cuDriverGetVersionShell;
  2140. cuDeviceGet_ := GetProcAddressCUDA(cuDeviceGet_Name);
  2141. cuDeviceGet := cuDeviceGetShell;
  2142. cuDeviceGetCount_ := GetProcAddressCUDA(cuDeviceGetCountName);
  2143. cuDeviceGetCount := cuDeviceGetCountShell;
  2144. cuDeviceGetName_ := GetProcAddressCUDA(cuDeviceGetNameName);
  2145. cuDeviceGetName := cuDeviceGetNameShell;
  2146. cuDeviceComputeCapability_ := GetProcAddressCUDA(cuDeviceComputeCapabilityName);
  2147. cuDeviceComputeCapability := cuDeviceComputeCapabilityShell;
  2148. cuDeviceTotalMem_ := GetProcAddressCUDA(cuDeviceTotalMemName);
  2149. cuDeviceTotalMem := cuDeviceTotalMemShell;
  2150. cuDeviceGetProperties_ := GetProcAddressCUDA(cuDeviceGetPropertiesName);
  2151. cuDeviceGetProperties := cuDeviceGetPropertiesShell;
  2152. cuDeviceGetAttribute_ := GetProcAddressCUDA(cuDeviceGetAttributeName);
  2153. cuDeviceGetAttribute := cuDeviceGetAttributeShell;
  2154. cuCtxCreate_ := GetProcAddressCUDA(cuCtxCreateName);
  2155. cuCtxCreate := cuCtxCreateShell;
  2156. cuCtxDestroy_ := GetProcAddressCUDA(cuCtxDestroyName);
  2157. cuCtxDestroy := cuCtxDestroyShell;
  2158. cuCtxAttach_ := GetProcAddressCUDA(cuCtxAttachName);
  2159. cuCtxAttach := cuCtxAttachShell;
  2160. cuCtxDetach_ := GetProcAddressCUDA(cuCtxDetachName);
  2161. cuCtxDetach := cuCtxDetachShell;
  2162. cuCtxPushCurrent_ := GetProcAddressCUDA(cuCtxPushCurrentName);
  2163. cuCtxPushCurrent := cuCtxPushCurrentShell;
  2164. cuCtxPopCurrent_ := GetProcAddressCUDA(cuCtxPopCurrentName);
  2165. cuCtxPopCurrent := cuCtxPopCurrentShell;
  2166. cuCtxGetDevice_ := GetProcAddressCUDA(cuCtxGetDeviceName);
  2167. cuCtxGetDevice := cuCtxGetDeviceShell;
  2168. cuCtxSynchronize_ := GetProcAddressCUDA(cuCtxSynchronizeName);
  2169. cuCtxSynchronize := cuCtxSynchronizeShell;
  2170. cuModuleLoad_ := GetProcAddressCUDA(cuModuleLoadName);
  2171. cuModuleLoad := cuModuleLoadShell;
  2172. cuModuleLoadData_ := GetProcAddressCUDA(cuModuleLoadDataName);
  2173. cuModuleLoadData := cuModuleLoadDataShell;
  2174. cuModuleLoadDataEx_ := GetProcAddressCUDA(cuModuleLoadDataExName);
  2175. cuModuleLoadDataEx := cuModuleLoadDataExShell;
  2176. cuModuleLoadFatBinary_ := GetProcAddressCUDA(cuModuleLoadFatBinaryName);
  2177. cuModuleLoadFatBinary := cuModuleLoadFatBinaryShell;
  2178. cuModuleUnload_ := GetProcAddressCUDA(cuModuleUnloadName);
  2179. cuModuleUnload := cuModuleUnloadShell;
  2180. cuModuleGetFunction_ := GetProcAddressCUDA(cuModuleGetFunctionName);
  2181. cuModuleGetFunction := cuModuleGetFunctionShell;
  2182. cuModuleGetGlobal_ := GetProcAddressCUDA(cuModuleGetGlobalName);
  2183. cuModuleGetGlobal := cuModuleGetGlobalShell;
  2184. cuModuleGetTexRef_ := GetProcAddressCUDA(cuModuleGetTexRefName);
  2185. cuModuleGetTexRef := cuModuleGetTexRefShell;
  2186. cuMemGetInfo_ := GetProcAddressCUDA(cuMemGetInfoName);
  2187. cuMemGetInfo := cuMemGetInfoShell;
  2188. cuMemAlloc_ := GetProcAddressCUDA(cuMemAllocName);
  2189. cuMemAlloc := cuMemAllocShell;
  2190. cuMemAllocPitch_ := GetProcAddressCUDA(cuMemAllocPitchName);
  2191. cuMemAllocPitch := cuMemAllocPitchShell;
  2192. cuMemFree_ := GetProcAddressCUDA(cuMemFreeName);
  2193. cuMemFree := cuMemFreeShell;
  2194. cuMemGetAddressRange_ := GetProcAddressCUDA(cuMemGetAddressRangeName);
  2195. cuMemGetAddressRange := cuMemGetAddressRangeShell;
  2196. cuMemAllocHost_ := GetProcAddressCUDA(cuMemAllocHostName);
  2197. cuMemAllocHost := cuMemAllocHostShell;
  2198. cuMemFreeHost_ := GetProcAddressCUDA(cuMemFreeHostName);
  2199. cuMemFreeHost := cuMemFreeHostShell;
  2200. cuMemHostAlloc_ := GetProcAddressCUDA(cuMemHostAllocName);
  2201. cuMemHostAlloc := cuMemHostAllocShell;
  2202. cuMemHostGetDevicePointer_ := GetProcAddressCUDA(cuMemHostGetDevicePointerName);
  2203. cuMemHostGetDevicePointer := cuMemHostGetDevicePointerShell;
  2204. cuMemHostGetFlags_ := GetProcAddressCUDA(cuMemHostGetFlagsName);
  2205. cuMemHostGetFlags := cuMemHostGetFlagsShell;
  2206. cuMemcpyHtoD_ := GetProcAddressCUDA(cuMemcpyHtoDName);
  2207. cuMemcpyHtoD := cuMemcpyHtoDShell;
  2208. cuMemcpyDtoH_ := GetProcAddressCUDA(cuMemcpyDtoHName);
  2209. cuMemcpyDtoH := cuMemcpyDtoHShell;
  2210. cuMemcpyDtoD_ := GetProcAddressCUDA(cuMemcpyDtoDName);
  2211. cuMemcpyDtoD := cuMemcpyDtoDShell;
  2212. cuMemcpyDtoDAsync_ := GetProcAddressCUDA(cuMemcpyDtoDAsyncName);
  2213. cuMemcpyDtoDAsync := cuMemcpyDtoDAsyncShell;
  2214. cuMemcpyDtoA_ := GetProcAddressCUDA(cuMemcpyDtoAName);
  2215. cuMemcpyDtoA := cuMemcpyDtoAShell;
  2216. cuMemcpyAtoD_ := GetProcAddressCUDA(cuMemcpyAtoDName);
  2217. cuMemcpyAtoD := cuMemcpyAtoDShell;
  2218. cuMemcpyHtoA_ := GetProcAddressCUDA(cuMemcpyHtoAName);
  2219. cuMemcpyHtoA := cuMemcpyHtoAShell;
  2220. cuMemcpyAtoH_ := GetProcAddressCUDA(cuMemcpyAtoHName);
  2221. cuMemcpyAtoH := cuMemcpyAtoHShell;
  2222. cuMemcpyAtoA_ := GetProcAddressCUDA(cuMemcpyAtoAName);
  2223. cuMemcpyAtoA := cuMemcpyAtoAShell;
  2224. cuMemcpy2D_ := GetProcAddressCUDA(cuMemcpy2DName);
  2225. cuMemcpy2D := cuMemcpy2DShell;
  2226. cuMemcpy2DUnaligned_ := GetProcAddressCUDA(cuMemcpy2DUnalignedName);
  2227. cuMemcpy2DUnaligned := cuMemcpy2DUnalignedShell;
  2228. cuMemcpy3D_ := GetProcAddressCUDA(cuMemcpy3DName);
  2229. cuMemcpy3D := cuMemcpy3DShell;
  2230. cuMemcpyHtoDAsync_ := GetProcAddressCUDA(cuMemcpyHtoDAsyncName);
  2231. cuMemcpyHtoDAsync := cuMemcpyHtoDAsyncShell;
  2232. cuMemcpyDtoHAsync_ := GetProcAddressCUDA(cuMemcpyDtoHAsyncName);
  2233. cuMemcpyDtoHAsync := cuMemcpyDtoHAsyncShell;
  2234. cuMemcpyHtoAAsync_ := GetProcAddressCUDA(cuMemcpyHtoAAsyncName);
  2235. cuMemcpyHtoAAsync := cuMemcpyHtoAAsyncShell;
  2236. cuMemcpyAtoHAsync_ := GetProcAddressCUDA(cuMemcpyAtoHAsyncName);
  2237. cuMemcpyAtoHAsync := cuMemcpyAtoHAsyncShell;
  2238. cuMemcpy2DAsync_ := GetProcAddressCUDA(cuMemcpy2DAsyncName);
  2239. cuMemcpy2DAsync := cuMemcpy2DAsyncShell;
  2240. cuMemcpy3DAsync_ := GetProcAddressCUDA(cuMemcpy3DAsyncName);
  2241. cuMemcpy3DAsync := cuMemcpy3DAsyncShell;
  2242. cuMemsetD8_ := GetProcAddressCUDA(cuMemsetD8Name);
  2243. cuMemsetD8 := cuMemsetD8Shell;
  2244. cuMemsetD16_ := GetProcAddressCUDA(cuMemsetD16Name);
  2245. cuMemsetD16 := cuMemsetD16Shell;
  2246. cuMemsetD32_ := GetProcAddressCUDA(cuMemsetD32Name);
  2247. cuMemsetD32 := cuMemsetD32Shell;
  2248. cuMemsetD2D8_ := GetProcAddressCUDA(cuMemsetD2D8Name);
  2249. cuMemsetD2D8 := cuMemsetD2D8Shell;
  2250. cuMemsetD2D16_ := GetProcAddressCUDA(cuMemsetD2D16Name);
  2251. cuMemsetD2D16 := cuMemsetD2D16Shell;
  2252. cuMemsetD2D32_ := GetProcAddressCUDA(cuMemsetD2D32Name);
  2253. cuMemsetD2D32 := cuMemsetD2D32Shell;
  2254. cuFuncSetBlockShape_ := GetProcAddressCUDA(cuFuncSetBlockShapeName);
  2255. cuFuncSetBlockShape := cuFuncSetBlockShapeShell;
  2256. cuFuncSetSharedSize_ := GetProcAddressCUDA(cuFuncSetSharedSizeName);
  2257. cuFuncSetSharedSize := cuFuncSetSharedSizeShell;
  2258. cuFuncGetAttribute_ := GetProcAddressCUDA(cuFuncGetAttributeName);
  2259. cuFuncGetAttribute := cuFuncGetAttributeShell;
  2260. cuArrayCreate_ := GetProcAddressCUDA(cuArrayCreateName);
  2261. cuArrayCreate := cuArrayCreateShell;
  2262. cuArrayGetDescriptor_ := GetProcAddressCUDA(cuArrayGetDescriptorName);
  2263. cuArrayGetDescriptor := cuArrayGetDescriptorShell;
  2264. cuArrayDestroy_ := GetProcAddressCUDA(cuArrayDestroyName);
  2265. cuArrayDestroy := cuArrayDestroyShell;
  2266. cuArray3DCreate_ := GetProcAddressCUDA(cuArray3DCreateName);
  2267. cuArray3DCreate := cuArray3DCreateShell;
  2268. cuArray3DGetDescriptor_ := GetProcAddressCUDA(cuArray3DGetDescriptorName);
  2269. cuArray3DGetDescriptor := cuArray3DGetDescriptorShell;
  2270. cuTexRefCreate_ := GetProcAddressCUDA(cuTexRefCreateName);
  2271. cuTexRefCreate := cuTexRefCreateShell;
  2272. cuTexRefDestroy_ := GetProcAddressCUDA(cuTexRefDestroyName);
  2273. cuTexRefDestroy := cuTexRefDestroyShell;
  2274. cuTexRefSetArray_ := GetProcAddressCUDA(cuTexRefSetArrayName);
  2275. cuTexRefSetArray := cuTexRefSetArrayShell;
  2276. cuTexRefSetAddress_ := GetProcAddressCUDA(cuTexRefSetAddressName);
  2277. cuTexRefSetAddress := cuTexRefSetAddressShell;
  2278. cuTexRefSetAddress2D_ := GetProcAddressCUDA(cuTexRefSetAddress2DName);
  2279. cuTexRefSetAddress2D := cuTexRefSetAddress2DShell;
  2280. cuTexRefSetFormat_ := GetProcAddressCUDA(cuTexRefSetFormatName);
  2281. cuTexRefSetFormat := cuTexRefSetFormatShell;
  2282. cuTexRefSetAddressMode_ := GetProcAddressCUDA(cuTexRefSetAddressModeName);
  2283. cuTexRefSetAddressMode := cuTexRefSetAddressModeShell;
  2284. cuTexRefSetFilterMode_ := GetProcAddressCUDA(cuTexRefSetFilterModeName);
  2285. cuTexRefSetFilterMode := cuTexRefSetFilterModeShell;
  2286. cuTexRefSetFlags_ := GetProcAddressCUDA(cuTexRefSetFlagsName);
  2287. cuTexRefSetFlags := cuTexRefSetFlagsShell;
  2288. cuTexRefGetAddress_ := GetProcAddressCUDA(cuTexRefGetAddressName);
  2289. cuTexRefGetAddress := cuTexRefGetAddressShell;
  2290. cuTexRefGetArray_ := GetProcAddressCUDA(cuTexRefGetArrayName);
  2291. cuTexRefGetArray := cuTexRefGetArrayShell;
  2292. cuTexRefGetAddressMode_ := GetProcAddressCUDA(cuTexRefGetAddressModeName);
  2293. cuTexRefGetAddressMode := cuTexRefGetAddressModeShell;
  2294. cuTexRefGetFilterMode_ := GetProcAddressCUDA(cuTexRefGetFilterModeName);
  2295. cuTexRefGetFilterMode := cuTexRefGetFilterModeShell;
  2296. cuTexRefGetFormat_ := GetProcAddressCUDA(cuTexRefGetFormatName);
  2297. cuTexRefGetFormat := cuTexRefGetFormatShell;
  2298. cuTexRefGetFlags_ := GetProcAddressCUDA(cuTexRefGetFlagsName);
  2299. cuTexRefGetFlags := cuTexRefGetFlagsShell;
  2300. cuParamSetSize_ := GetProcAddressCUDA(cuParamSetSizeName);
  2301. cuParamSetSize := cuParamSetSizeShell;
  2302. cuParamSeti_ := GetProcAddressCUDA(cuParamSetiName);
  2303. cuParamSeti := cuParamSetiShell;
  2304. cuParamSetf_ := GetProcAddressCUDA(cuParamSetfName);
  2305. cuParamSetf := cuParamSetfShell;
  2306. cuParamSetv_ := GetProcAddressCUDA(cuParamSetvName);
  2307. cuParamSetv := cuParamSetvShell;
  2308. cuParamSetTexRef_ := GetProcAddressCUDA(cuParamSetTexRefName);
  2309. cuParamSetTexRef := cuParamSetTexRefShell;
  2310. cuLaunch_ := GetProcAddressCUDA(cuLaunchName);
  2311. cuLaunch := cuLaunchShell;
  2312. cuLaunchGrid_ := GetProcAddressCUDA(cuLaunchGridName);
  2313. cuLaunchGrid := cuLaunchGridShell;
  2314. cuLaunchGridAsync_ := GetProcAddressCUDA(cuLaunchGridAsyncName);
  2315. cuLaunchGridAsync := cuLaunchGridAsyncShell;
  2316. cuEventCreate_ := GetProcAddressCUDA(cuEventCreateName);
  2317. cuEventCreate := cuEventCreateShell;
  2318. cuEventRecord_ := GetProcAddressCUDA(cuEventRecordName);
  2319. cuEventRecord := cuEventRecordShell;
  2320. cuEventQuery_ := GetProcAddressCUDA(cuEventQueryName);
  2321. cuEventQuery := cuEventQueryShell;
  2322. cuEventSynchronize_ := GetProcAddressCUDA(cuEventSynchronizeName);
  2323. cuEventSynchronize := cuEventSynchronizeShell;
  2324. cuEventDestroy_ := GetProcAddressCUDA(cuEventDestroyName);
  2325. cuEventDestroy := cuEventDestroyShell;
  2326. cuEventElapsedTime_ := GetProcAddressCUDA(cuEventElapsedTimeName);
  2327. cuEventElapsedTime := cuEventElapsedTimeShell;
  2328. cuStreamCreate_ := GetProcAddressCUDA(cuStreamCreateName);
  2329. cuStreamCreate := cuStreamCreateShell;
  2330. cuStreamQuery_ := GetProcAddressCUDA(cuStreamQueryName);
  2331. cuStreamQuery := cuStreamQueryShell;
  2332. cuStreamSynchronize_ := GetProcAddressCUDA(cuStreamSynchronizeName);
  2333. cuStreamSynchronize := cuStreamSynchronizeShell;
  2334. cuStreamDestroy_ := GetProcAddressCUDA(cuStreamDestroyName);
  2335. cuStreamDestroy := cuStreamDestroyShell;
  2336. cuGLCtxCreate_ := GetProcAddressCUDA(cuGLCtxCreateName);
  2337. cuGLCtxCreate := cuGLCtxCreateShell;
  2338. cuGraphicsGLRegisterBuffer_ := GetProcAddressCUDA(cuGraphicsGLRegisterBufferName);
  2339. cuGraphicsGLRegisterBuffer := cuGraphicsGLRegisterBufferShell;
  2340. cuGraphicsGLRegisterImage_ := GetProcAddressCUDA(cuGraphicsGLRegisterImageName);
  2341. cuGraphicsGLRegisterImage := cuGraphicsGLRegisterImageShell;
  2342. cuWGLGetDevice_ := GetProcAddressCUDA(cuWGLGetDeviceName);
  2343. cuWGLGetDevice := cuWGLGetDeviceShell;
  2344. cuGraphicsUnregisterResource_ := GetProcAddressCUDA(cuGraphicsUnregisterResourceName);
  2345. cuGraphicsUnregisterResource := cuGraphicsUnregisterResourceShell;
  2346. cuGraphicsSubResourceGetMappedArray_ := GetProcAddressCUDA(cuGraphicsSubResourceGetMappedArrayName);
  2347. cuGraphicsSubResourceGetMappedArray := cuGraphicsSubResourceGetMappedArrayShell;
  2348. cuGraphicsResourceGetMappedPointer_ := GetProcAddressCUDA(cuGraphicsResourceGetMappedPointerName);
  2349. cuGraphicsResourceGetMappedPointer := cuGraphicsResourceGetMappedPointerShell;
  2350. cuGraphicsResourceSetMapFlags_ := GetProcAddressCUDA(cuGraphicsResourceSetMapFlagsName);
  2351. cuGraphicsResourceSetMapFlags := cuGraphicsResourceSetMapFlagsShell;
  2352. cuGraphicsMapResources_ := GetProcAddressCUDA(cuGraphicsMapResourcesName);
  2353. cuGraphicsMapResources := cuGraphicsMapResourcesShell;
  2354. cuGraphicsUnmapResources_ := GetProcAddressCUDA(cuGraphicsUnmapResourcesName);
  2355. cuGraphicsUnmapResources := cuGraphicsUnmapResourcesShell;
  2356. cuGLInit := GetProcAddressCUDA(cuGLInitName);
  2357. cuGLRegisterBufferObject_ := GetProcAddressCUDA(cuGLRegisterBufferObjectName);
  2358. cuGLRegisterBufferObject := cuGLRegisterBufferObjectShell;
  2359. cuGLMapBufferObject_ := GetProcAddressCUDA(cuGLMapBufferObjectName);
  2360. cuGLMapBufferObject := cuGLMapBufferObjectShell;
  2361. cuGLUnmapBufferObject_ := GetProcAddressCUDA(cuGLUnmapBufferObjectName);
  2362. cuGLUnmapBufferObject := cuGLUnmapBufferObjectShell;
  2363. cuGLUnregisterBufferObject_ := GetProcAddressCUDA(cuGLUnregisterBufferObjectName);
  2364. cuGLUnregisterBufferObject := cuGLUnregisterBufferObjectShell;
  2365. cuGLSetBufferObjectMapFlags_ := GetProcAddressCUDA(cuGLSetBufferObjectMapFlagsName);
  2366. cuGLSetBufferObjectMapFlags := cuGLSetBufferObjectMapFlagsShell;
  2367. cuGLMapBufferObjectAsync_ := GetProcAddressCUDA(cuGLMapBufferObjectAsyncName);
  2368. cuGLMapBufferObjectAsync := cuGLMapBufferObjectAsyncShell;
  2369. cuGLUnmapBufferObjectAsync_ := GetProcAddressCUDA(cuGLUnmapBufferObjectAsyncName);
  2370. cuGLUnmapBufferObjectAsync := cuGLUnmapBufferObjectAsyncShell;
  2371. {$ENDIF GLS_CUDA_DEBUG_MODE}
  2372. cuDriverGetVersion(V);
  2373. {$IFDEF USE_LOGGING}
  2374. LogInfoFmt('%s version %d is loaded', [CUDAAPIDLL, V]);
  2375. {$ENDIF}
  2376. Result := True;
  2377. end;
  2378. function IsCUDAInitialized: Boolean;
  2379. begin
  2380. Result := (CUDAHandle <> INVALID_MODULEHANDLE);
  2381. end;
  2382. function Get_CUDA_API_Error_String(AError: TCUresult): string;
  2383. begin
  2384. if AError = CUDA_SUCCESS then
  2385. Result := 'No errors'
  2386. else if AError = CUDA_ERROR_INVALID_VALUE then
  2387. Result := 'Invalid value'
  2388. else if AError = CUDA_ERROR_OUT_OF_MEMORY then
  2389. Result := 'Out of memory'
  2390. else if AError = CUDA_ERROR_NOT_INITIALIZED then
  2391. Result := 'Driver not initialized'
  2392. else if AError = CUDA_ERROR_DEINITIALIZED then
  2393. Result := 'Driver deinitialized'
  2394. else if AError = CUDA_ERROR_NO_DEVICE then
  2395. Result := 'No CUDA-capable device available'
  2396. else if AError = CUDA_ERROR_INVALID_DEVICE then
  2397. Result := 'Invalid device'
  2398. else if AError = CUDA_ERROR_INVALID_IMAGE then
  2399. Result := 'Invalid kernel image'
  2400. else if AError = CUDA_ERROR_INVALID_CONTEXT then
  2401. Result := 'Invalid context'
  2402. else if AError = CUDA_ERROR_CONTEXT_ALREADY_CURRENT then
  2403. Result := 'Context already current'
  2404. else if AError = CUDA_ERROR_MAP_FAILED then
  2405. Result := 'Map failed'
  2406. else if AError = CUDA_ERROR_UNMAP_FAILED then
  2407. Result := 'Unmap failed'
  2408. else if AError = CUDA_ERROR_ARRAY_IS_MAPPED then
  2409. Result := 'Array is mapped'
  2410. else if AError = CUDA_ERROR_ALREADY_MAPPED then
  2411. Result := 'Already mapped'
  2412. else if AError = CUDA_ERROR_NO_BINARY_FOR_GPU then
  2413. Result := 'No binary for GPU'
  2414. else if AError = CUDA_ERROR_ALREADY_ACQUIRED then
  2415. Result := 'Already acquired'
  2416. else if AError = CUDA_ERROR_NOT_MAPPED then
  2417. Result := 'Not mapped'
  2418. else if AError = CUDA_ERROR_NOT_MAPPED_AS_ARRAY then
  2419. Result := 'Not mapped as array'
  2420. else if AError = CUDA_ERROR_NOT_MAPPED_AS_POINTER then
  2421. Result := 'Not mapped as pointer'
  2422. else if AError = CUDA_ERROR_INVALID_SOURCE then
  2423. Result := 'Invalid source'
  2424. else if AError = CUDA_ERROR_FILE_NOT_FOUND then
  2425. Result := 'File not found'
  2426. else if AError = CUDA_ERROR_INVALID_HANDLE then
  2427. Result := 'Invalid handle'
  2428. else if AError = CUDA_ERROR_NOT_FOUND then
  2429. Result := 'Not found'
  2430. else if AError = CUDA_ERROR_NOT_READY then
  2431. Result := 'CUDA not ready'
  2432. else if AError = CUDA_ERROR_LAUNCH_FAILED then
  2433. Result := 'Launch failed'
  2434. else if AError = CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES then
  2435. Result := 'Launch exceeded resources'
  2436. else if AError = CUDA_ERROR_LAUNCH_TIMEOUT then
  2437. Result := 'Launch exceeded timeout'
  2438. else if AError = CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING then
  2439. Result := 'Launch with incompatible texturing'
  2440. else if AError = CUDA_ERROR_POINTER_IS_64BIT then
  2441. Result := 'Pointer is 64bit'
  2442. else if AError = CUDA_ERROR_SIZE_IS_64BIT then
  2443. Result := 'Size is 64bit'
  2444. else
  2445. Result := 'Unknown error';
  2446. end;
  2447. end.