Bläddra i källkod

Updated GLScene_GPU packages

GLScene 5 år sedan
förälder
incheckning
a17dbb7e1e
91 ändrade filer med 13670 tillägg och 12421 borttagningar
  1. 0 6
      AdvDemos/CPP/PanoramViewer/ReadMe.txt
  2. 4 4
      Demos/CPP/meshes/shadedterrain/Unit1.cpp
  3. 1 1
      Demos/CPP/meshes/shadedterrain/Unit1.h
  4. 1 1
      Demos/CPP/meshes/terrain/Unit1.cpp
  5. 1 1
      Demos/CPP/meshes/terrain/Unit1.h
  6. 1 1
      Demos/CPP/physics/OdeTerrain/Unit1.h
  7. 1 1
      Demos/CPP/rendering/SkyBox/Unit1.cpp
  8. 1 1
      Demos/CPP/rendering/SkyBox/Unit1.h
  9. 3 3
      Demos/CPP/specialsFX/Atmosphere/Unit1.cpp
  10. 2 2
      Demos/CPP/specialsFX/Atmosphere/Unit1.h
  11. 3 5
      Demos/Delphi/behaviours/DCEDemo/Unit1.pas
  12. 2 3
      Demos/Delphi/computing/FastFourierTransformation/FFT_Demo.dpr
  13. 714 9
      Demos/Delphi/computing/FastFourierTransformation/FFT_Demo.dproj
  14. 2 2
      Demos/Delphi/computing/FastFourierTransformation/UnitMain.dfm
  15. 7 7
      Demos/Delphi/computing/FastFourierTransformation/UnitMain.pas
  16. 4 4
      Demos/Delphi/computing/PostProcessing/uDemo.dfm
  17. 7 7
      Demos/Delphi/computing/PostProcessing/uDemo.pas
  18. 5 5
      Demos/Delphi/computing/ScalarProduct/uDemo.dfm
  19. 7 7
      Demos/Delphi/computing/ScalarProduct/uDemo.pas
  20. 3 3
      Demos/Delphi/computing/SimpleCUDATexture/uDemo.dfm
  21. 7 7
      Demos/Delphi/computing/SimpleCUDATexture/uDemo.pas
  22. 5 5
      Demos/Delphi/computing/StableFluids/uDemo.dfm
  23. 10 10
      Demos/Delphi/computing/StableFluids/uDemo.pas
  24. 6 6
      Demos/Delphi/computing/VertexDataGeneration/uDemo.dfm
  25. 11 14
      Demos/Delphi/computing/VertexDataGeneration/uDemo.pas
  26. 10 7
      Demos/Delphi/materials/cubemap/Unit1.dfm
  27. 2 0
      Demos/Delphi/meshes/actor/Actor.dproj
  28. 10 7
      Demos/Delphi/meshes/shadedterrain/Unit1.dfm
  29. 31 26
      Demos/Delphi/meshes/shadedterrain/Unit1.pas
  30. 9 6
      Demos/Delphi/meshes/shadedterrain/shadedterrain.dproj
  31. 5 4
      Demos/Delphi/meshes/skeletal/skeletal.dproj
  32. 12 0
      Demos/Delphi/meshes/synthterr/Unit1.dfm
  33. 108 101
      Demos/Delphi/meshes/synthterr/Unit1.pas
  34. 1 1
      Demos/Delphi/meshes/terrain/Unit1.pas
  35. 1 1
      Demos/Delphi/physics/odeterrain/Unit1.pas
  36. 1 1
      Demos/Delphi/rendering/SkyBox/Unit1.pas
  37. 1 4
      Demos/Delphi/utilities/ArchiverDemo/main.pas
  38. BIN
      Demos/media/ground.jpg
  39. BIN
      Demos/media/texture.jpg
  40. 17 17
      Packages/GLScene.groupproj
  41. 4 4
      Packages/GLScene_Cg_DT.dproj
  42. 4 4
      Packages/GLScene_Cg_RT.dproj
  43. 4 4
      Packages/GLScene_DT.dproj
  44. 5 4
      Packages/GLScene_GPU_DT.dpk
  45. 17 16
      Packages/GLScene_GPU_DT.dproj
  46. 14 13
      Packages/GLScene_GPU_RT.dpk
  47. 21 21
      Packages/GLScene_GPU_RT.dproj
  48. 4 4
      Packages/GLScene_Physics_DT.dproj
  49. 4 4
      Packages/GLScene_Physics_RT.dproj
  50. 5 4
      Packages/GLScene_RT.dproj
  51. 4 4
      Packages/GLScene_Sounds_DT.dproj
  52. 4 4
      Packages/GLScene_Sounds_RT.dproj
  53. 4 0
      Resources/GLSceneGPU.rc
  54. 0 4
      Resources/GLSceneParallel.rc
  55. 0 0
      Resources/components/TGLCUDA.bmp
  56. 0 0
      Resources/components/TGLCUDACompiler.bmp
  57. 0 0
      Resources/components/TGLCUDADevice.bmp
  58. 0 0
      Resources/objects/TCUDAFeedbackMesh.bmp
  59. 1 1
      Source/FCUDAEditor.dfm
  60. 16 17
      Source/FCUDAEditor.pas
  61. 2 2
      Source/GLS.BaseMeshSilhouette.pas
  62. 0 1
      Source/GLS.Blur.pas
  63. 1 1
      Source/GLS.FilePGM.pas
  64. 62 62
      Source/GLS.GeomObjects.pas
  65. 18 18
      Source/GLS.Isosurface.pas
  66. 274 250
      Source/GLS.Objects.pas
  67. 1 3
      Source/GLS.PersistentClasses.pas
  68. 1 0
      Source/GLS.ProxyObjects.pas
  69. 6 6
      Source/GLS.ROAMPatch.pas
  70. 27 28
      Source/GLS.SceneRegister.pas
  71. 19 19
      Source/GLS.Silhouette.pas
  72. 8 15
      Source/GLS.TerrainRenderer.pas
  73. 1 17
      Source/GLS.Utils.pas
  74. 68 67
      Source/GLS.VectorRecTypes.pas
  75. 1 1
      Source/GLSL.TextureShaders.pas
  76. 2944 2949
      Source/GPU.CUDA.pas
  77. 445 451
      Source/GPU.CUDACompiler.pas
  78. 865 868
      Source/GPU.CUDAContext.pas
  79. 145 142
      Source/GPU.CUDADataAccess.pas
  80. 393 393
      Source/GPU.CUDAFFTPlan.pas
  81. 489 519
      Source/GPU.CUDAFourierTransform.pas
  82. 1140 1141
      Source/GPU.CUDAGraphics.pas
  83. 515 515
      Source/GPU.CUDAParser.pas
  84. 416 447
      Source/GPU.CUDAPropEditors.pas
  85. 65 0
      Source/GPU.CUDARegister.pas
  86. 786 786
      Source/GPU.CUDARuntime.pas
  87. 157 189
      Source/GPU.CUDAUtility.pas
  88. 2676 2954
      Source/Import.CUDAApi.pas
  89. 169 179
      Source/Import.CUDAParallelPrimitives.pas
  90. 695 0
      Source/Import.CUDARuntime.pas
  91. 154 0
      Source/Import.CUDAUtility.pas

+ 0 - 6
AdvDemos/CPP/PanoramViewer/ReadMe.txt

@@ -1,6 +0,0 @@
-Panoramic Viewer Demo (with Source)
-
-See Unit1.pas header & code for more details.
-
-Eric Grange
-http://glscene.org

+ 4 - 4
Demos/CPP/meshes/shadedterrain/Unit1.cpp

@@ -218,17 +218,17 @@ void __fastcall TForm1::TBSubSamplingChange(TObject * Sender)
 void __fastcall TForm1::TBIntensityChange(TObject * Sender)
 {
   int i;
-  Graphics::TBitmap * bmp;
+  Vcl::Graphics::TBitmap * bmp;
 
   TGLMaterial *m = GLMaterialLibrary1->LibMaterialByName("contrast")->Material;
-  bmp = new Graphics::TBitmap;
+  bmp = new Vcl::Graphics::TBitmap;
   try
   {
     bmp->PixelFormat = pf24bit;
     bmp->Width = 1;
     bmp->Height = 1;
     i = 255;
-	bmp->Canvas->Pixels[0][0] = (Graphics::TColor) RGB(i, i, i);
+	bmp->Canvas->Pixels[0][0] = (Vcl::Graphics::TColor) RGB(i, i, i);
 	m->Texture->Image->Assign(bmp);
   }
   __finally
@@ -236,7 +236,7 @@ void __fastcall TForm1::TBIntensityChange(TObject * Sender)
     delete bmp;
   }
   i = (TBIntensity->Position * 255) / 100;
-  m->Texture->EnvColor->AsWinColor = (Graphics::TColor) RGB(i, i, i);
+  m->Texture->EnvColor->AsWinColor = (Vcl::Graphics::TColor) RGB(i, i, i);
 
   LABumpIntensity->Caption = IntToStr(TBIntensity->Position) + " %";
 }

+ 1 - 1
Demos/CPP/meshes/shadedterrain/Unit1.h

@@ -12,7 +12,7 @@
 
 
 #include "GLS.BumpMapHDS.hpp"
-#include "GLLensFlare.hpp"
+#include "GLS.LensFlare.hpp"
 #include "GLS.VectorGeometry.hpp"
 #include "GLS.SceneViewer.hpp"
 #include "GLS.SkyDome.hpp"

+ 1 - 1
Demos/CPP/meshes/terrain/Unit1.cpp

@@ -10,7 +10,7 @@
 
 //---------------------------------------------------------------------------
 #pragma package(smart_init)
-#pragma link "GLLensFlare"
+#pragma link "GLS.LensFlare"
 #pragma link "GLS.VectorGeometry"
 #pragma link "GLSM.BASS"
 #pragma link "GLS.Sound"

+ 1 - 1
Demos/CPP/meshes/terrain/Unit1.h

@@ -11,7 +11,7 @@
 #include <ExtCtrls.hpp>
 #include <GIFImg.hpp>
 
-#include "GLLensFlare.hpp"
+#include "GLS.LensFlare.hpp"
 #include "GLS.VectorGeometry.hpp"
 #include "GLSM.BASS.hpp"
 #include "GLS.Sound.hpp"

+ 1 - 1
Demos/CPP/physics/OdeTerrain/Unit1.h

@@ -16,7 +16,7 @@
 
 #include "GLS.HeightData.hpp"
 #include "GLS.HUDObjects.hpp"
-#include "GLLensFlare.hpp"
+#include "GLS.LensFlare.hpp"
 #include "GLS.Material.hpp"
 #include "GLS.Navigator.hpp"
 #include "GLS.Objects.hpp"

+ 1 - 1
Demos/CPP/rendering/SkyBox/Unit1.cpp

@@ -10,7 +10,7 @@
 #pragma link "GLS.Cadencer"
 #pragma link "GLS.Coordinates"
 
-#pragma link "GLLensFlare"
+#pragma link "GLS.LensFlare"
 #pragma link "GLS.Material"
 #pragma link "GLS.Navigator"
 #pragma link "GLS.Objects"

+ 1 - 1
Demos/CPP/rendering/SkyBox/Unit1.h

@@ -11,7 +11,7 @@
 #include "GLS.Cadencer.hpp"
 #include "GLS.Coordinates.hpp"
 
-#include "GLLensFlare.hpp"
+#include "GLS.LensFlare.hpp"
 #include "GLS.Material.hpp"
 #include "GLS.Navigator.hpp"
 #include "GLS.Objects.hpp"

+ 3 - 3
Demos/CPP/specialsFX/Atmosphere/Unit1.cpp

@@ -13,11 +13,11 @@
 #pragma link "GLS.BaseClasses"
 #pragma link "GLS.Coordinates"
 
-#pragma link "GLLensFlare"
+#pragma link "GLS.LensFlare"
 #pragma link "GLS.SimpleNavigation"
 #pragma link "GLS.SkyDome"
-#pragma link "GLAtmosphere"
-#pragma link "GLLensFlare"
+#pragma link "GLS.Atmosphere"
+
 
 #pragma resource "*.dfm"
 TForm1 *Form1;

+ 2 - 2
Demos/CPP/specialsFX/Atmosphere/Unit1.h

@@ -13,13 +13,13 @@
 #include "GLS.Scene.hpp"
 #include "GLS.Objects.hpp"
 #include "GLS.Cadencer.hpp"
-#include "GLLensFlare.hpp"
+#include "GLS.LensFlare.hpp"
 #include "GLS.SceneViewer.hpp"
 #include "GLS.Texture.hpp"
 #include "GLS.SkyDome.hpp"
 #include "GLS.VectorGeometry.hpp"
 
-#include "GLAtmosphere.hpp"
+#include "GLS.Atmosphere.hpp"
 #include "GLS.SimpleNavigation.hpp"
 #include "GLS.Behaviours.hpp"
 #include "JPeg.hpp"

+ 3 - 5
Demos/Delphi/behaviours/DCEDemo/Unit1.pas

@@ -107,12 +107,10 @@ implementation
 procedure TForm1.Load;
 begin
   SetGLSceneMediaDir();
+
   //Load Materials
-  with GLMatlLib do
-  begin
-    AddTextureMaterial('Terrain', 'snow512.jpg');
-    AddTextureMaterial('Actor', 'waste.jpg');
-  end;
+  GLMatlLib.AddTextureMaterial('Terrain', 'snow512.jpg');
+  GLMatlLib.AddTextureMaterial('Actor', 'waste.jpg');
 
   //Load Terrain
   GLBitmapHDS1.MaxPoolSize := 8 * 1024 * 1024;

+ 2 - 3
Demos/Delphi/computing/FastFourierTransformation/FFT_Demo.dpr

@@ -2,13 +2,12 @@ program FFT_Demo;
 
 uses
   Forms,
-  UnitMain in 'UnitMain.pas' {Form1},
-  CPUFFT in 'CPUFFT.pas';
+  CPUFFT in 'CPUFFT.pas',
+  UnitMain in 'UnitMain.pas';
 
 {$R *.res}
 
 begin
   Application.Initialize;
-  Application.CreateForm(TForm1, Form1);
   Application.Run;
 end.

+ 714 - 9
Demos/Delphi/computing/FastFourierTransformation/FFT_Demo.dproj

@@ -78,22 +78,20 @@
     </PropertyGroup>
     <PropertyGroup Condition="'$(Cfg_2_Win32)'!=''">
         <BT_BuildType>Debug</BT_BuildType>
-        <DCC_UnitSearchPath>..\..\..\..\Source;$(DCC_UnitSearchPath)</DCC_UnitSearchPath>
+        <DCC_UnitSearchPath>..\..\..\Source;$(DCC_UnitSearchPath)</DCC_UnitSearchPath>
         <VerInfo_IncludeVerInfo>true</VerInfo_IncludeVerInfo>
         <VerInfo_Locale>1033</VerInfo_Locale>
         <VerInfo_Keys>CompanyName=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductVersion=1.0.0.0;Comments=;ProgramID=com.embarcadero.$(MSBuildProjectName);FileDescription=$(MSBuildProjectName);ProductName=$(MSBuildProjectName)</VerInfo_Keys>
         <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
-        <DCC_ExeOutput>.\$(Platform)\$(Config)</DCC_ExeOutput>
+        <DCC_ExeOutput>.</DCC_ExeOutput>
         <DCC_DcuOutput>.\$(Platform)\$(Config)</DCC_DcuOutput>
     </PropertyGroup>
     <ItemGroup>
         <DelphiCompile Include="$(MainSource)">
             <MainSource>MainSource</MainSource>
         </DelphiCompile>
-        <DCCReference Include="UnitMain.pas">
-            <Form>Form1</Form>
-        </DCCReference>
         <DCCReference Include="CPUFFT.pas"/>
+        <DCCReference Include="UnitMain.pas"/>
         <BuildConfiguration Include="Debug">
             <Key>Cfg_2</Key>
             <CfgParent>Base</CfgParent>
@@ -115,19 +113,726 @@
                     <Source Name="MainSource">FFT_Demo.dpr</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl">File C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl not found</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>
                 <Platform value="Win32">True</Platform>
                 <Platform value="Win64">False</Platform>
             </Platforms>
+            <Deployment Version="3">
+                <DeployFile LocalName="FFT_Demo.exe" Configuration="Debug" Class="ProjectOutput">
+                    <Platform Name="Win32">
+                        <RemoteName>FFT_Demo.exe</RemoteName>
+                        <Overwrite>true</Overwrite>
+                    </Platform>
+                </DeployFile>
+                <DeployClass Name="AdditionalDebugSymbols">
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidClassesDexFile">
+                    <Platform Name="Android">
+                        <RemoteDir>classes</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidFileProvider">
+                    <Platform Name="Android">
+                        <RemoteDir>res\xml</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidGDBServer">
+                    <Platform Name="Android">
+                        <RemoteDir>library\lib\armeabi-v7a</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidLibnativeArmeabiFile">
+                    <Platform Name="Android">
+                        <RemoteDir>library\lib\armeabi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidLibnativeMipsFile">
+                    <Platform Name="Android">
+                        <RemoteDir>library\lib\mips</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidServiceOutput">
+                    <Platform Name="Android">
+                        <RemoteDir>library\lib\armeabi-v7a</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidSplashImageDef">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidSplashStyles">
+                    <Platform Name="Android">
+                        <RemoteDir>res\values</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="AndroidSplashStylesV21">
+                    <Platform Name="Android">
+                        <RemoteDir>res\values-v21</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_Colors">
+                    <Platform Name="Android">
+                        <RemoteDir>res\values</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_DefaultAppIcon">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_LauncherIcon144">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-xxhdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_LauncherIcon36">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-ldpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_LauncherIcon48">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-mdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_LauncherIcon72">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-hdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_LauncherIcon96">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-xhdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_NotificationIcon24">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-mdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_NotificationIcon36">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-hdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_NotificationIcon48">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-xhdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_NotificationIcon72">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-xxhdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_NotificationIcon96">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-xxxhdpi</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_SplashImage426">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-small</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_SplashImage470">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-normal</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_SplashImage640">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-large</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_SplashImage960">
+                    <Platform Name="Android">
+                        <RemoteDir>res\drawable-xlarge</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="Android_Strings">
+                    <Platform Name="Android">
+                        <RemoteDir>res\values</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="DebugSymbols">
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="DependencyFramework">
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                        <Extensions>.framework</Extensions>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                        <Extensions>.framework</Extensions>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="DependencyModule">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                        <Extensions>.dll;.bpl</Extensions>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Required="true" Name="DependencyPackage">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                        <Extensions>.dylib</Extensions>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                        <Extensions>.bpl</Extensions>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="File">
+                    <Platform Name="Android">
+                        <Operation>0</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice32">
+                        <Operation>0</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>0</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>0</Operation>
+                    </Platform>
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\Resources\StartUp\</RemoteDir>
+                        <Operation>0</Operation>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents\Resources\StartUp\</RemoteDir>
+                        <Operation>0</Operation>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch1024x768">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch1536x2048">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch1668">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch1668x2388">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch2048x1536">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch2048x2732">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch2224">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch2388x1668">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch2732x2048">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPad_Launch768x1024">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch1125">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch1136x640">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch1242">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch1242x2688">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch1334">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch1792">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch2208">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch2436">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch2688x1242">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch320">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch640">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch640x1136">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch750">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="iPhone_Launch828">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectAndroidManifest">
+                    <Platform Name="Android">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectiOSDeviceDebug">
+                    <Platform Name="iOSDevice32">
+                        <RemoteDir>..\$(PROJECTNAME).app.dSYM\Contents\Resources\DWARF</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <RemoteDir>..\$(PROJECTNAME).app.dSYM\Contents\Resources\DWARF</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectiOSDeviceResourceRules">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectiOSEntitlements">
+                    <Platform Name="iOSDevice32">
+                        <RemoteDir>..\</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <RemoteDir>..\</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectiOSInfoPList">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectiOSResource">
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectOSXDebug">
+                    <Platform Name="OSX64">
+                        <RemoteDir>..\$(PROJECTNAME).app.dSYM\Contents\Resources\DWARF</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectOSXEntitlements">
+                    <Platform Name="OSX32">
+                        <RemoteDir>..\</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>..\</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectOSXInfoPList">
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectOSXResource">
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\Resources</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents\Resources</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Required="true" Name="ProjectOutput">
+                    <Platform Name="Android">
+                        <RemoteDir>library\lib\armeabi-v7a</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSDevice64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="iOSSimulator">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Linux64">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX32">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="OSX64">
+                        <RemoteDir>Contents\MacOS</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Win32">
+                        <Operation>0</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="ProjectUWPManifest">
+                    <Platform Name="Win32">
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Win64">
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="UWP_DelphiLogo150">
+                    <Platform Name="Win32">
+                        <RemoteDir>Assets</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Win64">
+                        <RemoteDir>Assets</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <DeployClass Name="UWP_DelphiLogo44">
+                    <Platform Name="Win32">
+                        <RemoteDir>Assets</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                    <Platform Name="Win64">
+                        <RemoteDir>Assets</RemoteDir>
+                        <Operation>1</Operation>
+                    </Platform>
+                </DeployClass>
+                <ProjectRoot Platform="iOSDevice64" Name="$(PROJECTNAME).app"/>
+                <ProjectRoot Platform="Win64" Name="$(PROJECTNAME)"/>
+                <ProjectRoot Platform="iOSDevice32" Name="$(PROJECTNAME).app"/>
+                <ProjectRoot Platform="Linux64" Name="$(PROJECTNAME)"/>
+                <ProjectRoot Platform="Win32" Name="$(PROJECTNAME)"/>
+                <ProjectRoot Platform="OSX32" Name="$(PROJECTNAME).app"/>
+                <ProjectRoot Platform="Android" Name="$(PROJECTNAME)"/>
+                <ProjectRoot Platform="OSX64" Name="$(PROJECTNAME).app"/>
+                <ProjectRoot Platform="iOSSimulator" Name="$(PROJECTNAME).app"/>
+            </Deployment>
         </BorlandProject>
         <ProjectFileVersion>12</ProjectFileVersion>
     </ProjectExtensions>
     <Import Project="$(BDS)\Bin\CodeGear.Delphi.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Delphi.Targets')"/>
     <Import Project="$(APPDATA)\Embarcadero\$(BDSAPPDATABASEDIR)\$(PRODUCTVERSION)\UserTools.proj" Condition="Exists('$(APPDATA)\Embarcadero\$(BDSAPPDATABASEDIR)\$(PRODUCTVERSION)\UserTools.proj')"/>
+    <Import Project="$(MSBuildProjectName).deployproj" Condition="Exists('$(MSBuildProjectName).deployproj')"/>
 </Project>

+ 2 - 2
Demos/Delphi/computing/FastFourierTransformation/UnitMain.dfm

@@ -207,7 +207,7 @@ object Form1: TForm1
       Height = 257
     end
   end
-  object GLSCUDA1: TGLSCUDA
+  object GLSCUDA1: TGLCUDA
     Left = 24
     Top = 104
     object Signal1D: TCUDAMemData
@@ -242,7 +242,7 @@ object Form1: TForm1
       Transform = fftDoubleComplexToDoubleComplex
     end
   end
-  object GLSCUDADevice1: TGLSCUDADevice
+  object GLSCUDADevice1: TGLCUDADevice
     SelectDevice = 'GeForce GT 630M (1)'
     Left = 24
     Top = 56

+ 7 - 7
Demos/Delphi/computing/FastFourierTransformation/UnitMain.pas

@@ -20,13 +20,13 @@ uses
   GLS.Utils,
   GLS.FilePGM,
   GLS.Graphics,
-  GLS.CUDAUtility,
-  GLS.CUDADataAccess,
+  GPU.CUDAUtility,
+  GPU.CUDADataAccess,
   GLS.VectorTypes,
   CPUFFT,
-  GLS.CUDAFFTPlan,
-  GLS.CUDA,
-  GLS.CUDAContext;
+  GPU.CUDAFFTPlan,
+  GPU.CUDA,
+  GPU.CUDAContext;
 
 type
   TDemoMode = (dmNone, dm1D, dm2D, dmLena);
@@ -51,8 +51,8 @@ type
     Label2: TLabel;
     Image2: TImage;
     Image1: TImage;
-    GLSCUDA1: TGLSCUDA;
-    GLSCUDADevice1: TGLSCUDADevice;
+    GLSCUDA1: TGLCUDA;
+    GLSCUDADevice1: TGLCUDADevice;
     Signal1D: TCUDAMemData;
     FFTPlan1D: TCUDAFFTPlan;
     ESize: TLabeledEdit;

+ 4 - 4
Demos/Delphi/computing/PostProcessing/uDemo.dfm

@@ -172,12 +172,12 @@ object Form1: TForm1
     Left = 32
     Top = 128
   end
-  object GLSCUDADevice1: TGLSCUDADevice
+  object GLSCUDADevice1: TGLCUDADevice
     SelectDevice = 'GeForce GTX 260 (1)'
     Left = 448
     Top = 24
   end
-  object GLSCUDA1: TGLSCUDA
+  object GLSCUDA1: TGLCUDA
     ComputingDevice = GLSCUDADevice1
     OnOpenGLInteropInit = GLSCUDA1OpenGLInteropInit
     Left = 448
@@ -696,7 +696,7 @@ object Form1: TForm1
         end
       end
     end
-    object processedTextureMapper: TCUDAGLImageResource
+    object processedTextureMapper: TCUDAImageResource
       TextureName = 'processedTexture'
       MaterialLibrary = GLMaterialLibrary1
     end
@@ -719,7 +719,7 @@ object Form1: TForm1
       ChannelsNum = cnFour
     end
   end
-  object GLSCUDACompiler1: TGLSCUDACompiler
+  object GLSCUDACompiler1: TGLCUDACompiler
     NVCCPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\\bin\'
     CppCompilerPath = 'C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\'
     ProjectModule = 'postProcessGL_kernel.cu'

+ 7 - 7
Demos/Delphi/computing/PostProcessing/uDemo.pas

@@ -28,10 +28,10 @@ uses
   GLS.FBORenderer,
   GLS.HUDObjects,
 
-  GLS.CUDA,
-  GLS.CUDAGraphics,
-  GLS.CUDACompiler,
-  GLS.CUDAContext,
+  GPU.CUDA,
+  GPU.CUDAGraphics,
+  GPU.CUDACompiler,
+  GPU.CUDAContext,
 
   GLS.State,
   GLS.RenderContextInfo,
@@ -53,9 +53,9 @@ type
     RenderRoot: TGLDummyCube;
     GLCylinder1: TGLCylinder;
     RenderToTexture: TGLFBORenderer;
-    GLSCUDADevice1: TGLSCUDADevice;
-    GLSCUDA1: TGLSCUDA;
-    GLSCUDACompiler1: TGLSCUDACompiler;
+    GLSCUDADevice1: TGLCUDADevice;
+    GLSCUDA1: TGLCUDA;
+    GLSCUDACompiler1: TGLCUDACompiler;
     MainModule: TCUDAModule;
     processedTextureMapper: TCUDAImageResource;
     CallPostProcess: TGLDirectOpenGL;

+ 5 - 5
Demos/Delphi/computing/ScalarProduct/uDemo.dfm

@@ -33,7 +33,7 @@ object Form1: TForm1
     TabOrder = 1
     OnClick = Button1Click
   end
-  object GLSCUDA1: TGLSCUDA
+  object GLSCUDA1: TGLCUDA
     ComputingDevice = GLSCUDADevice1
     Left = 104
     Top = 248
@@ -372,14 +372,14 @@ object Form1: TForm1
       ChannelsType = ctFloat
     end
   end
-  object GLSCUDADevice1: TGLSCUDADevice
-    SelectDevice = 'GeForce GTX 260 (1)'
+  object GLSCUDADevice1: TGLCUDADevice
+    SelectDevice = 'GeForce GTX 1040 (1)'
     Left = 16
     Top = 248
   end
-  object GLSCUDACompiler1: TGLSCUDACompiler
+  object GLSCUDACompiler1: TGLCUDACompiler
     NVCCPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\\bin\'
-    CppCompilerPath = 'C:\Program Files\Microsoft Visual Studio 12.0\VC\bin\'
+    CppCompilerPath = 'C:\Program Files\Microsoft Visual Studio 2017\VC\bin\'
     ProjectModule = 'scalarProd_kernel.cu'
     Left = 200
     Top = 248

+ 7 - 7
Demos/Delphi/computing/ScalarProduct/uDemo.pas

@@ -12,17 +12,17 @@ uses
   Vcl.Dialogs,
   Vcl.StdCtrls,
 
-  GLS.CUDACompiler,
-  GLS.CUDAContext,
-  GLS.CUDA,
-  GLS.CUDAUtility;
+  GPU.CUDACompiler,
+  GPU.CUDAContext,
+  GPU.CUDA,
+  GPU.CUDAUtility;
 
 
 type
   TForm1 = class(TForm)
-    GLSCUDA1: TGLSCUDA;
-    GLSCUDADevice1: TGLSCUDADevice;
-    GLSCUDACompiler1: TGLSCUDACompiler;
+    GLSCUDA1: TGLCUDA;
+    GLSCUDADevice1: TGLCUDADevice;
+    GLSCUDACompiler1: TGLCUDACompiler;
     Memo1: TMemo;
     Button1: TButton;
     MainModule: TCUDAModule;

+ 3 - 3
Demos/Delphi/computing/SimpleCUDATexture/uDemo.dfm

@@ -35,14 +35,14 @@ object Form1: TForm1
     ScrollBars = ssVertical
     TabOrder = 1
   end
-  object GLSCUDACompiler1: TGLSCUDACompiler
+  object GLSCUDACompiler1: TGLCUDACompiler
     NVCCPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\\bin\'
     CppCompilerPath = 'C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\'
     ProjectModule = 'simpleTexture_kernel.cu'
     Left = 80
     Top = 256
   end
-  object GLSCUDA1: TGLSCUDA
+  object GLSCUDA1: TGLCUDA
     ComputingDevice = GLSCUDADevice1
     Left = 48
     Top = 256
@@ -1154,7 +1154,7 @@ object Form1: TForm1
       ChannelsType = ctFloat
     end
   end
-  object GLSCUDADevice1: TGLSCUDADevice
+  object GLSCUDADevice1: TGLCUDADevice
     SelectDevice = 'GeForce GTX 260 (1)'
     Left = 16
     Top = 256

+ 7 - 7
Demos/Delphi/computing/SimpleCUDATexture/uDemo.pas

@@ -15,19 +15,19 @@ uses
   GLS.Utils,
   GLS.FilePGM,
 
-  GLS.CUDAContext,
-  GLS.CUDA,
-  GLS.CUDACompiler,
-  GLS.CUDAUtility,
+  GPU.CUDAContext,
+  GPU.CUDA,
+  GPU.CUDACompiler,
+  GPU.CUDAUtility,
 
   GLS.Graphics,
   GLS.TextureFormat;
 
 type
   TForm1 = class(TForm)
-    GLSCUDACompiler1: TGLSCUDACompiler;
-    GLSCUDA1: TGLSCUDA;
-    GLSCUDADevice1: TGLSCUDADevice;
+    GLSCUDACompiler1: TGLCUDACompiler;
+    GLSCUDA1: TGLCUDA;
+    GLSCUDADevice1: TGLCUDADevice;
     MainModule: TCUDAModule;
     Button1: TButton;
     Memo1: TMemo;

+ 5 - 5
Demos/Delphi/computing/StableFluids/uDemo.dfm

@@ -37,7 +37,7 @@ object Form1: TForm1
       DepthOfView = 100.000000000000000000
       FocalLength = 50.000000000000000000
     end
-    object ParticleRenderer: TGLFeedBackMesh
+    object ParticleRenderer: TCUDAFeedBackMesh
       Attributes = <
         item
           Name = 'Position'
@@ -152,12 +152,12 @@ object Form1: TForm1
     Left = 40
     Top = 48
   end
-  object GLSCUDADevice1: TGLSCUDADevice
+  object GLSCUDADevice1: TGLCUDADevice
     SelectDevice = 'GeForce GTX 260 (1)'
     Left = 448
     Top = 48
   end
-  object GLSCUDA1: TGLSCUDA
+  object GLSCUDA1: TGLCUDA
     ComputingDevice = GLSCUDADevice1
     OnOpenGLInteropInit = GLSCUDA1OpenGLInteropInit
     Left = 448
@@ -1106,12 +1106,12 @@ object Form1: TForm1
       Height = 512
       Transform = fftComplexToReal
     end
-    object ParticleMapper: TCUDAGLGeometryResource
+    object ParticleMapper: TCUDAGeometryResource
       FeedBackMesh = ParticleRenderer
       Mapping = grmWriteDiscard
     end
   end
-  object GLSCUDACompiler1: TGLSCUDACompiler
+  object GLSCUDACompiler1: TGLCUDACompiler
     NVCCPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\\bin\'
     CppCompilerPath = 'C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\'
     ProjectModule = 'Fluids kernels.cu'

+ 10 - 10
Demos/Delphi/computing/StableFluids/uDemo.pas

@@ -28,12 +28,12 @@ uses
   GLSL.Shader,
   GLS.Windows,
 
-  GLS.CUDAContext,
-  GLS.CUDA,
-  GLS.CUDACompiler,
-  GLS.CUDAFFTPlan,
-  GLS.CUDAGraphics,
-  GLS.CUDADataAccess;
+  GPU.CUDAContext,
+  GPU.CUDA,
+  GPU.CUDACompiler,
+  GPU.CUDAFFTPlan,
+  GPU.CUDAGraphics,
+  GPU.CUDADataAccess;
 
 type
   TForm1 = class(TForm)
@@ -41,9 +41,9 @@ type
     GLSceneViewer1: TGLSceneViewer;
     GLCadencer1: TGLCadencer;
     GLCamera1: TGLCamera;
-    GLSCUDADevice1: TGLSCUDADevice;
-    GLSCUDA1: TGLSCUDA;
-    GLSCUDACompiler1: TGLSCUDACompiler;
+    GLSCUDADevice1: TGLCUDADevice;
+    GLSCUDA1: TGLCUDA;
+    GLSCUDACompiler1: TGLCUDACompiler;
     MainModule: TCUDAModule;
     ArrayOfTexture: TCUDAMemData;
     TextureOfVelocityField: TCUDATexture;
@@ -59,7 +59,7 @@ type
     ResetButton: TGLButton;
     GLWindowsBitmapFont1: TGLWindowsBitmapFont;
     GLGuiLayout1: TGLGuiLayout;
-    ParticleRenderer: TGLS.FeedbackMesh;
+    ParticleRenderer: TCUDAFeedbackMesh;
     addForces: TCUDAFunction;
     advectVelocity: TCUDAFunction;
     diffuseProject: TCUDAFunction;

+ 6 - 6
Demos/Delphi/computing/VertexDataGeneration/uDemo.dfm

@@ -37,7 +37,7 @@ object Form1: TForm1
     object GLDummyCube1: TGLDummyCube
       CubeSize = 2.000000000000000000
       VisibleAtRunTime = True
-      object GLFeedBackMesh1: TGLFeedBackMesh
+      object GLFeedBackMesh1: TCUDAFeedBackMesh
         Attributes = <
           item
             Name = 'Position'
@@ -75,13 +75,13 @@ object Form1: TForm1
     Left = 24
     Top = 72
   end
-  object GLSCUDADevice1: TGLSCUDADevice
+  object GLSCUDADevice1: TGLCUDADevice
     SelectDevice = 'GeForce GTX 260 (1)'
     Left = 448
     Top = 16
   end
-  object GLSCUDA1: TGLSCUDA
-    ComputingDevice = GLSCUDADevice1
+  object GLSCUDA1: TGLCUDA
+    ComputingDevice = GLCUDADevice1
     OnOpenGLInteropInit = GLSCUDA1OpenGLInteropInit
     Left = 448
     Top = 72
@@ -745,14 +745,14 @@ object Form1: TForm1
         OnParameterSetup = MakeVertexBufferParameterSetup
       end
     end
-    object DotFieldMapper: TCUDAGLGeometryResource
+    object DotFieldMapper: TCUDAGeometryResource
       FeedBackMesh = GLFeedBackMesh1
       Mapping = grmWriteDiscard
       Left = 248
       Top = 264
     end
   end
-  object GLSCUDACompiler1: TGLSCUDACompiler
+  object GLSCUDACompiler1: TGLCUDACompiler
     NVCCPath = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\bin\'
     CppCompilerPath = 'C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\'
     ProjectModule = 'Simple kernel.cu'

+ 11 - 14
Demos/Delphi/computing/VertexDataGeneration/uDemo.pas

@@ -24,10 +24,10 @@ uses
   GLS.Coordinates,
   GLS.Context,
 
-  GLS.CUDA,
-  GLS.CUDACompiler,
-  GLS.CUDAContext,
-  GLS.CUDAGraphics,
+  GPU.CUDA,
+  GPU.CUDACompiler,
+  GPU.CUDAContext,
+  GPU.CUDAGraphics,
 
   GLS.Material,
   GLSL.CustomShader,
@@ -41,24 +41,21 @@ type
     GLCamera1: TGLCamera;
     GLDummyCube1: TGLDummyCube;
     GLSimpleNavigation1: TGLSimpleNavigation;
-    GLSCUDADevice1: TGLSCUDADevice;
-    GLSCUDA1: TGLSCUDA;
-    GLSCUDACompiler1: TGLSCUDACompiler;
+    GLSCUDADevice1: TGLCUDADevice;
+    GLSCUDA1: TGLCUDA;
+    GLSCUDACompiler1: TGLCUDACompiler;
     MainModule: TCUDAModule;
     DotFieldMapper: TCUDAGeometryResource;
     GLSLShader1: TGLSLShader;
     MakeDotField: TCUDAFunction;
-    GLS.FeedbackMesh1: TGLS.FeedbackMesh;
+    CUDAFeedbackMesh1: TCUDAFeedbackMesh;
     procedure GLCadencer1Progress(Sender: TObject;
       const deltaTime, newTime: Double);
     procedure MakeVertexBufferParameterSetup(Sender: TObject);
     procedure FormCreate(Sender: TObject);
     procedure GLSLShader1Apply(Shader: TGLCustomGLSLShader);
     procedure GLSCUDA1OpenGLInteropInit(out Context: TGLContext);
-  private
-
   public
-     
     FieldWidth: Integer;
     FieldHeight: Integer;
   end;
@@ -74,8 +71,8 @@ procedure TForm1.FormCreate(Sender: TObject);
 begin
   FieldWidth := 256;
   FieldHeight := 256;
-  GLS.FeedbackMesh1.VertexNumber := FieldWidth * FieldHeight;
-  GLS.FeedbackMesh1.Visible := True;
+  CUDAFeedbackMesh1.VertexNumber := FieldWidth * FieldHeight;
+  CUDAFeedbackMesh1.Visible := True;
   MakeDotField.Grid.SizeX := FieldWidth div MakeDotField.BlockShape.SizeX;
   MakeDotField.Grid.SizeY := FieldWidth div MakeDotField.BlockShape.SizeY;
 end;
@@ -96,7 +93,7 @@ procedure TForm1.MakeVertexBufferParameterSetup(Sender: TObject);
 begin
   with MakeDotField do
   begin
-    SetParam(DotFieldMapper.AttributeDataAddress[GLS.FeedbackMesh1.Attributes[0].Name]);
+    SetParam(DotFieldMapper.AttributeDataAddress[CUDAFeedbackMesh1.Attributes[0].Name]);
     SetParam(FieldWidth);
     SetParam(FieldHeight);
     SetParam(GLCadencer1.CurrentTime);

+ 10 - 7
Demos/Delphi/materials/cubemap/Unit1.dfm

@@ -2,8 +2,8 @@ object Form1: TForm1
   Left = 135
   Top = 85
   Caption = 'Cube Map'
-  ClientHeight = 364
-  ClientWidth = 415
+  ClientHeight = 590
+  ClientWidth = 677
   Color = clBtnFace
   Font.Charset = DEFAULT_CHARSET
   Font.Color = clWindowText
@@ -18,20 +18,23 @@ object Form1: TForm1
   object GLSceneViewer1: TGLSceneViewer
     Left = 0
     Top = 0
-    Width = 415
-    Height = 364
+    Width = 677
+    Height = 590
     Camera = GLCamera1
     BeforeRender = GLSceneViewer1BeforeRender
     Buffer.BackgroundColor = clTeal
-    FieldOfView = 62.487583160400390000
+    FieldOfView = 89.037071228027340000
+    PenAsTouch = False
     Align = alClient
     OnMouseDown = GLSceneViewer1MouseDown
     OnMouseMove = GLSceneViewer1MouseMove
     TabOrder = 0
+    ExplicitWidth = 415
+    ExplicitHeight = 364
   end
   object Button1: TButton
-    Left = 136
-    Top = 8
+    Left = 264
+    Top = 24
     Width = 163
     Height = 25
     Caption = 'Apply Cube Environment Map'

+ 2 - 0
Demos/Delphi/meshes/actor/Actor.dproj

@@ -106,6 +106,7 @@
         <VerInfo_IncludeVerInfo>true</VerInfo_IncludeVerInfo>
         <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
         <VerInfo_Keys>CompanyName=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductVersion=1.0.0.0;Comments=;ProgramID=com.embarcadero.$(MSBuildProjectName);FileDescription=$(MSBuildProjectName);ProductName=$(MSBuildProjectName)</VerInfo_Keys>
+        <DCC_UnitSearchPath>..\..\..\..\Source;$(DCC_UnitSearchPath)</DCC_UnitSearchPath>
     </PropertyGroup>
     <PropertyGroup Condition="'$(Cfg_2_Win64)'!=''">
         <BT_BuildType>Debug</BT_BuildType>
@@ -147,6 +148,7 @@
                     <Source Name="MainSource">Actor.dpr</Source>
                 </Source>
                 <Excluded_Packages>
+                    <Excluded_Packages Name="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl">File C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl not found</Excluded_Packages>
                     <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
                     <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
                     <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>

+ 10 - 7
Demos/Delphi/meshes/shadedterrain/Unit1.dfm

@@ -2,8 +2,8 @@ object Form1: TForm1
   Left = 165
   Top = 146
   Caption = 'Shaded Terrain'
-  ClientHeight = 408
-  ClientWidth = 788
+  ClientHeight = 585
+  ClientWidth = 1024
   Color = clBtnFace
   Font.Charset = DEFAULT_CHARSET
   Font.Color = clWindowText
@@ -21,8 +21,8 @@ object Form1: TForm1
   object GLSceneViewer1: TGLSceneViewer
     Left = 0
     Top = 65
-    Width = 788
-    Height = 343
+    Width = 1024
+    Height = 520
     Camera = GLCamera1
     BeforeRender = GLSceneViewer1BeforeRender
     Buffer.FogEnvironment.FogColor.Color = {0000803F0000803F0000803F0000803F}
@@ -32,7 +32,7 @@ object Form1: TForm1
     Buffer.BackgroundColor = clGray
     Buffer.FogEnable = True
     Buffer.Lighting = False
-    FieldOfView = 147.492416381835900000
+    FieldOfView = 158.228942871093800000
     PenAsTouch = False
     Align = alClient
     OnMouseDown = GLSceneViewer1MouseDown
@@ -42,7 +42,7 @@ object Form1: TForm1
   object Panel1: TPanel
     Left = 0
     Top = 0
-    Width = 788
+    Width = 1024
     Height = 65
     Align = alTop
     BevelOuter = bvLowered
@@ -108,6 +108,7 @@ object Form1: TForm1
       TabStop = False
       ThumbLength = 10
       TickMarks = tmBoth
+      OnChange = TBSubSamplingChange
     end
     object TBIntensity: TTrackBar
       Left = 125
@@ -138,7 +139,6 @@ object Form1: TForm1
       TabStop = False
       ThumbLength = 10
       TickMarks = tmBoth
-      OnChange = TBContourIntervalChange
     end
     object TBScaleZ: TTrackBar
       Left = 520
@@ -284,6 +284,7 @@ object Form1: TForm1
         Material.Texture.Compression = tcStandard
         Material.Texture.Disabled = False
         TextureScale.Coordinates = {00000042000000420000004200000000}
+        Texture2Name = 'ground'
       end
       item
         Name = 'texture'
@@ -491,6 +492,7 @@ object Form1: TForm1
         Material.Texture.MappingTCoordinates.Coordinates = {00000000000000000000000000000000}
         Material.Texture.Disabled = False
         TextureOffset.Coordinates = {0000003F000000000000000000000000}
+        Texture2Name = 'contrast'
       end
       item
         Name = 'contrast'
@@ -500,6 +502,7 @@ object Form1: TForm1
         Material.Texture.TextureMode = tmReplace
         Material.Texture.EnvColor.Color = {0000003F0000003F0000003F0000003F}
         Material.Texture.Disabled = False
+        Texture2Name = 'details'
       end>
     Left = 104
     Top = 144

+ 31 - 26
Demos/Delphi/meshes/shadedterrain/Unit1.pas

@@ -4,6 +4,7 @@ interface
 
 uses
   Winapi.Windows,
+  Winapi.OpenGL,
   System.SysUtils,
   System.UITypes,
   System.Classes,
@@ -21,6 +22,7 @@ uses
   GLS.Objects,
   GLS.Keyboard,
   GLS.TerrainRenderer,
+  GLS.ROAMPatch,
   GLS.HeightData,
   GLS.Cadencer,
   GLS.Texture,
@@ -28,14 +30,14 @@ uses
   GLS.SceneViewer,
   GLS.VectorTypes,
   GLS.VectorGeometry,
-  GLLensFlare,
+  GLS.LensFlare,
   GLS.BumpMapHDS,
+  GLSL.TextureShaders,
   GLS.Material,
   GLS.Coordinates,
- 
+
   GLS.State,
-  GLS.Utils,
-  GLSL.TextureShaders;
+  GLS.Utils;
 
 type
   TForm1 = class(TForm)
@@ -52,7 +54,7 @@ type
     SPSun: TGLSprite;
     GLLensFlare: TGLLensFlare;
     GLDummyCube1: TGLDummyCube;
-    GLTexCombineShader1: TGLTexCombineShader;
+    GLTexCombineShader1: TGLTexCombineShader;
     GLBumpmapHDS1: TGLBumpmapHDS;
     Panel1: TPanel;
     Label1: TLabel;
@@ -67,6 +69,7 @@ type
     LabelZ: TLabel;
     LabelContInterval: TLabel;
     CBContourIntervals: TCheckBox;
+    
     procedure GLSceneViewer1MouseDown(Sender: TObject; Button: TMouseButton;
       Shift: TShiftState; X, Y: Integer);
     procedure GLSceneViewer1MouseMove(Sender: TObject; Shift: TShiftState;
@@ -103,17 +106,21 @@ implementation
 procedure TForm1.FormCreate(Sender: TObject);
 begin
   SetGLSceneMediaDir();
-  // 8 MB height data cache
+
+  // Load Terrain in 8 MB height data cache
   // Note this is the data size in terms of elevation samples, it does not
   // take into account all the data required/allocated by the renderer
   GLBitmapHDS1.MaxPoolSize := 8 * 1024 * 1024;
-
-  // specify height map data
+
+  // specify a map for height field data
   GLBitmapHDS1.Picture.LoadFromFile('terrain.bmp');
 
-  // load the texture maps
-  GLMaterialLibrary1.LibMaterialByName('details')
-    .Material.Texture.Image.LoadFromFile('detailmap.jpg');
+  GLMaterialLibrary1.LibMaterialByName('details').Material.Texture.Image.LoadFromFile('detailmap.jpg');
+
+  (*
+  GLMaterialLibrary1.LibMaterialByName('texture').Material.Texture.Image.LoadFromFile('texture.jpg');
+  *)
+
   SPSun.Material.Texture.Image.LoadFromFile('flare1.bmp');
 
   // Could've been done at design time, but then it hurts the eyes ;)
@@ -122,38 +129,35 @@ begin
   FCamHeight := 20;
 
   // apply texture map scale (our heightmap size is 256)
-  TerrainRenderer1.TilesPerTexture := 1; // 256/TerrainRenderer1.TileSize;
+  TerrainRenderer1.TilesPerTexture := 4; // 256/TerrainRenderer1.TileSize;
   // TerrainRenderer1.MaterialLibrary := GLMaterialLibrary1;
   TerrainRenderer1.ContourWidth := 2;
 
-  // initialize intensity texture
   TBIntensityChange(Self);
-  // initialize Scale Z
   TBScaleZChange(Self);
-  // initialize ContourInterval
-  TBContourIntervalChange(Self);
 end;
 
 procedure TForm1.FormShow(Sender: TObject);
 begin
   TBSubSamplingChange(Self);
+  TBContourIntervalChange(Self);
 end;
 
 procedure TForm1.GLBumpmapHDS1NewTilePrepared(Sender: TGLBumpmapHDS;
   heightData: TGLHeightData; normalMapMaterial: TGLLibMaterial);
 var
-  n: TVector;
+  Vec: TVector;
 begin
   heightData.MaterialName := normalMapMaterial.Name;
-  normalMapMaterial.Texture2Name := 'contrast';
+  normalMapMaterial.Texture2Name := 'details';//'texture'; not 'ground' or 'contrast';
   normalMapMaterial.Shader := GLTexCombineShader1;
   normalMapMaterial.Material.MaterialOptions := [moNoLighting];
-  n := VectorNormalize(SPSun.AbsolutePosition);
-  ScaleVector(n, 0.5);
-  n.Y := -n.Y;
-  n.Z := -n.Z;
-  AddVector(n, 0.5);
-  normalMapMaterial.Material.FrontProperties.Diffuse.Color := n;
+  Vec := VectorNormalize(SPSun.AbsolutePosition);
+  ScaleVector(Vec, 0.5);
+  Vec.Y := -Vec.Y;
+  Vec.Z := -Vec.Z;
+  AddVector(Vec, 0.5);
+  normalMapMaterial.Material.FrontProperties.Diffuse.Color := Vec;
 end;
 
 procedure TForm1.GLCadencer1Progress(Sender: TObject;
@@ -209,6 +213,7 @@ begin
   end;
 end;
 
+
 procedure TForm1.Timer1Timer(Sender: TObject);
 begin
   Caption := 'Shaded Terrain ' + GLSceneViewer1.FramesPerSecondText;
@@ -303,7 +308,7 @@ var
   i: Integer;
   bmp: TBitmap;
 begin
-  with GLMaterialLibrary1.LibMaterialByName('contrast').Material do
+  with GLMaterialLibrary1.LibMaterialByName('ground').Material do
   begin
     bmp := TBitmap.Create;
     try
@@ -340,7 +345,7 @@ begin
     TerrainRenderer1.ContourInterval := TBContourInterval.Position
   else
     TerrainRenderer1.ContourInterval := 0;
-  SetFocus;
+  TBContourinterval.SetFocus;
 end;
 
 end.

+ 9 - 6
Demos/Delphi/meshes/shadedterrain/shadedterrain.dproj

@@ -103,11 +103,13 @@
     </PropertyGroup>
     <PropertyGroup Condition="'$(Cfg_2_Win32)'!=''">
         <BT_BuildType>Debug</BT_BuildType>
-        <DCC_ExeOutput>.\$(Platform)\$(Config)</DCC_ExeOutput>
+        <DCC_ExeOutput>.</DCC_ExeOutput>
         <VerInfo_Locale>1033</VerInfo_Locale>
         <VerInfo_IncludeVerInfo>true</VerInfo_IncludeVerInfo>
         <Icon_MainIcon>$(BDS)\bin\delphi_PROJECTICON.ico</Icon_MainIcon>
-        <VerInfo_Keys>CompanyName=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductVersion=1.0.0.0;Comments=;ProgramID=com.embarcadero.$(ModuleName);FileDescription=$(ModuleName);ProductName=$(ModuleName)</VerInfo_Keys>
+        <VerInfo_Keys>CompanyName=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductVersion=1.0.0.0;Comments=;ProgramID=com.embarcadero.$(MSBuildProjectName);FileDescription=$(MSBuildProjectName);ProductName=$(MSBuildProjectName)</VerInfo_Keys>
+        <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
+        <DCC_UnitSearchPath>..\..\..\..\Source;$(DCC_UnitSearchPath)</DCC_UnitSearchPath>
     </PropertyGroup>
     <PropertyGroup Condition="'$(Cfg_2_Win64)'!=''">
         <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
@@ -146,10 +148,11 @@
                     <Source Name="MainSource">shadedterrain.dpr</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl">File C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl not found</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 5 - 4
Demos/Delphi/meshes/skeletal/skeletal.dproj

@@ -112,10 +112,11 @@
                     <Source Name="MainSource">skeletal.dpr</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl">File C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_DT.bpl not found</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 12 - 0
Demos/Delphi/meshes/synthterr/Unit1.dfm

@@ -88,4 +88,16 @@ object Form1: TForm1
     Left = 152
     Top = 88
   end
+  object GLShadowHDS: TGLShadowHDS
+    MaxPoolSize = 0
+    Active = True
+    ShadowmapLibrary = GLMaterialLibrary1
+    ScanDistance = 64
+    SoftRange = 1
+    Diffuse = 0.750000000000000000
+    Ambient = 0.250000000000000000
+    MaxTextures = 0
+    Left = 256
+    Top = 88
+  end
 end

+ 108 - 101
Demos/Delphi/meshes/synthterr/Unit1.pas

@@ -25,11 +25,11 @@ uses
   GLS.Texture,
   GLS.SceneViewer,
   GLS.VectorGeometry,
- 
+
   GLS.Material,
   GLS.Coordinates,
   GLS.BaseClasses,
-  GLS.Keyboard;
+  GLS.Keyboard, GLS.ShadowHDS;
 
 type
   TForm1 = class(TForm)
@@ -42,23 +42,21 @@ type
     GLCadencer1: TGLCadencer;
     GLMaterialLibrary1: TGLMaterialLibrary;
     GLCustomHDS: TGLCustomHDS;
-    procedure GLSceneViewer1MouseDown(Sender: TObject;
-      Button: TMouseButton; Shift: TShiftState; X, Y: Integer);
+    GLShadowHDS: TGLShadowHDS;
+    procedure GLSceneViewer1MouseDown(Sender: TObject; Button: TMouseButton;
+      Shift: TShiftState; X, Y: Integer);
     procedure GLSceneViewer1MouseMove(Sender: TObject; Shift: TShiftState;
       X, Y: Integer);
     procedure Timer1Timer(Sender: TObject);
-    procedure GLCadencer1Progress(Sender: TObject; const deltaTime,
-      newTime: Double);
+    procedure GLCadencer1Progress(Sender: TObject;
+      const deltaTime, newTime: Double);
     procedure FormCreate(Sender: TObject);
     procedure FormKeyPress(Sender: TObject; var Key: Char);
-    procedure GLCustomHDSStartPreparingData(heightData: TGLHeightData);
-  private
-
+    procedure GLCustomHDSStartPreparingData(HeightData: TGLHeightData);
   public
-
-    mx, my : Integer;
-    fullScreen : Boolean;
-    FCamHeight : Single;
+    mx, my: Integer;
+    fullScreen: Boolean;
+    FCamHeight: Single;
   end;
 
 var
@@ -70,8 +68,8 @@ implementation
 
 procedure TForm1.FormCreate(Sender: TObject);
 var
-   i : Integer;
-   bmp : TBitmap;
+  i: Integer;
+  bmp: TBitmap;
 begin
   // 8 MB height data cache
   // Note this is the data size in terms of elevation samples, it does not
@@ -96,32 +94,23 @@ begin
     bmp.Canvas.Pixels[i, 0] := RGB(i, i, i);
   with GLMaterialLibrary1.AddTextureMaterial('BW', bmp) do
   begin
-    with Material.Texture do
-    begin
-      MappingMode := tmmObjectLinear;
-      MappingSCoordinates.AsVector := VectorMake(0, 0, 0.0001, 0);
-    end;
+    Material.Texture.MappingMode := tmmObjectLinear;
+    Material.Texture.MappingSCoordinates.AsVector := VectorMake(0, 0, 0.0001, 0);
   end;
   // Red, Blue map linearly to X and Y axis respectively
   for i := 0 to 255 do
     bmp.Canvas.Pixels[i, 0] := RGB(i, 0, 0);
   with GLMaterialLibrary1.AddTextureMaterial('Red', bmp) do
   begin
-    with Material.Texture do
-    begin
-      MappingMode := tmmObjectLinear;
-      MappingSCoordinates.AsVector := VectorMake(0.1, 0, 0, 0);
-    end;
+    Material.Texture.MappingMode := tmmObjectLinear;
+    Material.Texture.MappingSCoordinates.AsVector := VectorMake(0.1, 0, 0, 0);
   end;
   for i := 0 to 255 do
     bmp.Canvas.Pixels[i, 0] := RGB(0, 0, i);
   with GLMaterialLibrary1.AddTextureMaterial('Blue', bmp) do
   begin
-    with Material.Texture do
-    begin
-      MappingMode := tmmObjectLinear;
-      MappingSCoordinates.AsVector := VectorMake(0, 0.1, 0, 0);
-    end;
+    Material.Texture.MappingMode := tmmObjectLinear;
+    Material.Texture.MappingSCoordinates.AsVector := VectorMake(0, 0.1, 0, 0);
   end;
   bmp.Free;
   TerrainRenderer1.MaterialLibrary := GLMaterialLibrary1;
@@ -131,101 +120,119 @@ end;
 // The beef : this event does all the interesting elevation data stuff
 //
 
-procedure TForm1.GLCustomHDSStartPreparingData(heightData: TGLHeightData);
+procedure TForm1.GLCustomHDSStartPreparingData(HeightData: TGLHeightData);
 var
-   y, x : Integer;
-   rasterLine : PByteArray;
-   oldType : TGLHeightDataType;
-   b : Byte;
-   d, dy : Single;
+  Y, X: Integer;
+  rasterLine: PByteArray;
+  oldType: TGLHeightDataType;
+  b: Byte;
+  d, dy: Single;
 begin
-   heightData.DataState:=hdsPreparing;
-   // retrieve data
-   with heightData do begin
-      oldType:=DataType;
-      Allocate(hdtByte);
-      // Cheap texture changed (32 is our tileSize = 2^5)
-      // This basicly picks a texture for each tile depending on the tile's position
-      case (((XLeft xor YTop) shr 5) and 3) of
-         0, 3 : heightData.MaterialName:='BW';
-         1 : heightData.materialName:='Blue';
-         2 : heightData.materialName:='Red';
-      end;
-      // 'Cheap' elevation data : this is just a formula z=f(x, y)
-      for y:=YTop to YTop+Size-1 do begin
-         rasterLine:=ByteRaster[y-YTop];
-         dy:=Sqr(y);
-         for x:=XLeft to XLeft+Size-1 do begin
-            d:=Sqrt(Sqr(x)+dy);
-            b:=Round(128+128*Sin(d*0.2)/(d*0.1+1));
-            rasterLine[x-XLeft]:=b;
-         end;
+  HeightData.DataState := hdsPreparing;
+  // retrieve data
+  with HeightData do
+  begin
+    oldType := DataType;
+    Allocate(hdtByte);
+    // Cheap texture changed (32 is our tileSize = 2^5)
+    // This basicly picks a texture for each tile depending on the tile's position
+    case (((XLeft xor YTop) shr 5) and 3) of
+      0, 3: HeightData.MaterialName := 'BW';
+         1: HeightData.MaterialName := 'Blue';
+         2: HeightData.MaterialName := 'Red';
+    end;
+    // 'Cheap' elevation data : this is just a formula z=f(x, y)
+    for Y := YTop to YTop + Size - 1 do
+    begin
+      rasterLine := ByteRaster[Y - YTop];
+      dy := Sqr(Y);
+      for X := XLeft to XLeft + Size - 1 do
+      begin
+        d := Sqrt(Sqr(X) + dy);
+        b := Round(128 + 128 * Sin(d * 0.2) / (d * 0.1 + 1));
+        rasterLine[X - XLeft] := b;
       end;
-      if oldType<>hdtByte then
-         DataType:=oldType;
-   end;
-   inherited;
+    end;
+    if oldType <> hdtByte then
+      DataType := oldType;
+  end;
+  inherited;
 end;
 
 // Movement, mouse handling etc.
 
-procedure TForm1.GLSceneViewer1MouseDown(Sender: TObject;
-  Button: TMouseButton; Shift: TShiftState; X, Y: Integer);
+procedure TForm1.GLSceneViewer1MouseDown(Sender: TObject; Button: TMouseButton;
+  Shift: TShiftState; X, Y: Integer);
 begin
-   mx:=x;
-   my:=y;
+  mx := X;
+  my := Y;
 end;
 
-procedure TForm1.GLSceneViewer1MouseMove(Sender: TObject;
-  Shift: TShiftState; X, Y: Integer);
+procedure TForm1.GLSceneViewer1MouseMove(Sender: TObject; Shift: TShiftState;
+  X, Y: Integer);
 begin
-   if ssLeft in Shift then begin
-      GLCamera1.MoveAroundTarget(my-y, mx-x);
-      mx:=x;
-      my:=y;
-   end;
+  if ssLeft in Shift then
+  begin
+    GLCamera1.MoveAroundTarget(my - Y, mx - X);
+    mx := X;
+    my := Y;
+  end;
 end;
 
 procedure TForm1.Timer1Timer(Sender: TObject);
 begin
-   Caption:=Format('%.1f FPS - %d',
-                   [GLSceneViewer1.FramesPerSecond, TerrainRenderer1.LastTriangleCount]);
-   GLSceneViewer1.ResetPerformanceMonitor;
+  Caption := Format('%.1f FPS - %d', [GLSceneViewer1.FramesPerSecond,
+    TerrainRenderer1.LastTriangleCount]);
+  GLSceneViewer1.ResetPerformanceMonitor;
 end;
 
 procedure TForm1.FormKeyPress(Sender: TObject; var Key: Char);
 begin
-   case Key of
-      '+' : if GLCamera1.DepthOfView<4000 then begin
-         GLCamera1.DepthOfView:=GLCamera1.DepthOfView*1.2;
-         with GLSceneViewer1.Buffer.FogEnvironment do begin
-            FogEnd:=FogEnd*1.2;
-            FogStart:=FogStart*1.2;
-         end;
+  case Key of
+    '+':
+      if GLCamera1.DepthOfView < 4000 then
+      begin
+        GLCamera1.DepthOfView := GLCamera1.DepthOfView * 1.2;
+        with GLSceneViewer1.Buffer.FogEnvironment do
+        begin
+          FogEnd := FogEnd * 1.2;
+          FogStart := FogStart * 1.2;
+        end;
       end;
-      '-' : if GLCamera1.DepthOfView>300 then begin
-         GLCamera1.DepthOfView:=GLCamera1.DepthOfView/1.2;
-         with GLSceneViewer1.Buffer.FogEnvironment do begin
-            FogEnd:=FogEnd/1.2;
-            FogStart:=FogStart/1.2;
-         end;
+    '-':
+      if GLCamera1.DepthOfView > 300 then
+      begin
+        GLCamera1.DepthOfView := GLCamera1.DepthOfView / 1.2;
+        with GLSceneViewer1.Buffer.FogEnvironment do
+        begin
+          FogEnd := FogEnd / 1.2;
+          FogStart := FogStart / 1.2;
+        end;
       end;
-      '*' : with TerrainRenderer1 do
-         if CLODPrecision>5 then CLODPrecision:=Round(CLODPrecision*0.8);
-      '/' : with TerrainRenderer1 do
-         if CLODPrecision<500 then CLODPrecision:=Round(CLODPrecision*1.2);
-      '8' : with TerrainRenderer1 do
-         if QualityDistance>40 then QualityDistance:=Round(QualityDistance*0.8);
-      '9' : with TerrainRenderer1 do
-         if QualityDistance<1000 then QualityDistance:=Round(QualityDistance*1.2);
-   end;
-   Key:=#0;
+    '*':
+      with TerrainRenderer1 do
+        if CLODPrecision > 5 then
+          CLODPrecision := Round(CLODPrecision * 0.8);
+    '/':
+      with TerrainRenderer1 do
+        if CLODPrecision < 500 then
+          CLODPrecision := Round(CLODPrecision * 1.2);
+    '8':
+      with TerrainRenderer1 do
+        if QualityDistance > 40 then
+          QualityDistance := Round(QualityDistance * 0.8);
+    '9':
+      with TerrainRenderer1 do
+        if QualityDistance < 1000 then
+          QualityDistance := Round(QualityDistance * 1.2);
+  end;
+  Key := #0;
 end;
 
-procedure TForm1.GLCadencer1Progress(Sender: TObject; const deltaTime,
-  newTime: Double);
+procedure TForm1.GLCadencer1Progress(Sender: TObject;
+  const deltaTime, newTime: Double);
 var
-   speed : Single;
+  speed: Single;
 begin
   // handle keypresses
   if IsKeyDown(VK_SHIFT) then

+ 1 - 1
Demos/Delphi/meshes/terrain/Unit1.pas

@@ -33,7 +33,7 @@ uses
   GLS.Sound,
   GLSM.BASS,
   GLS.VectorGeometry,
-  GLLensFlare,
+  GLS.LensFlare,
   GLS.Material,
   GLS.Coordinates,
   GLS.BaseClasses,

+ 1 - 1
Demos/Delphi/physics/odeterrain/Unit1.pas

@@ -29,7 +29,7 @@ uses
   GLS.SkyDome,
   GLS.SceneViewer,
   GLS.VectorGeometry,
-  GLLensFlare,
+  GLS.LensFlare,
   Physics.ODEManager,
   GLS.Navigator,
   GLS.GeomObjects,

+ 1 - 1
Demos/Delphi/rendering/SkyBox/Unit1.pas

@@ -23,7 +23,7 @@ uses
   GLS.Navigator,
   GLS.SceneViewer,
   GLS.Keyboard,
-  GLLensFlare,
+  GLS.LensFlare,
   GLS.Objects,
   GLS.Material,
   GLS.Coordinates,

+ 1 - 4
Demos/Delphi/utilities/ArchiverDemo/main.pas

@@ -64,11 +64,8 @@ begin
 end;
 
 procedure TForm1.FormCreate(Sender: TObject);
-var
-  FileName: TFileName;
 begin
-  ///SetGLSceneMediaDir();
-  FileName := GetSceneMediaPath();
+  SetGLSceneMediaDir();
   GLMaterialLibrary1.TexturePaths := GetCurrentDir();
   with GLSArchiveManager1.Archives[0] do
   begin

BIN
Demos/media/ground.jpg


BIN
Demos/media/texture.jpg


+ 17 - 17
Packages/GLScene.groupproj

@@ -21,10 +21,10 @@
         <Projects Include="GLScene_Physics_DT.dproj">
             <Dependencies/>
         </Projects>
-        <Projects Include="GLScene_Parallel_RT.dproj">
+        <Projects Include="GLScene_GPU_RT.dproj">
             <Dependencies/>
         </Projects>
-        <Projects Include="GLScene_Parallel_DT.dproj">
+        <Projects Include="GLScene_GPU_DT.dproj">
             <Dependencies/>
         </Projects>
         <Projects Include="GLScene_Cg_RT.dproj">
@@ -95,23 +95,23 @@
     <Target Name="GLScene_Physics_DT:Make">
         <MSBuild Projects="GLScene_Physics_DT.dproj" Targets="Make"/>
     </Target>
-    <Target Name="GLScene_Parallel_RT">
-        <MSBuild Projects="GLScene_Parallel_RT.dproj"/>
+    <Target Name="GLScene_GPU_RT">
+        <MSBuild Projects="GLScene_GPU_RT.dproj"/>
     </Target>
-    <Target Name="GLScene_Parallel_RT:Clean">
-        <MSBuild Projects="GLScene_Parallel_RT.dproj" Targets="Clean"/>
+    <Target Name="GLScene_GPU_RT:Clean">
+        <MSBuild Projects="GLScene_GPU_RT.dproj" Targets="Clean"/>
     </Target>
-    <Target Name="GLScene_Parallel_RT:Make">
-        <MSBuild Projects="GLScene_Parallel_RT.dproj" Targets="Make"/>
+    <Target Name="GLScene_GPU_RT:Make">
+        <MSBuild Projects="GLScene_GPU_RT.dproj" Targets="Make"/>
     </Target>
-    <Target Name="GLScene_Parallel_DT">
-        <MSBuild Projects="GLScene_Parallel_DT.dproj"/>
+    <Target Name="GLScene_GPU_DT">
+        <MSBuild Projects="GLScene_GPU_DT.dproj"/>
     </Target>
-    <Target Name="GLScene_Parallel_DT:Clean">
-        <MSBuild Projects="GLScene_Parallel_DT.dproj" Targets="Clean"/>
+    <Target Name="GLScene_GPU_DT:Clean">
+        <MSBuild Projects="GLScene_GPU_DT.dproj" Targets="Clean"/>
     </Target>
-    <Target Name="GLScene_Parallel_DT:Make">
-        <MSBuild Projects="GLScene_Parallel_DT.dproj" Targets="Make"/>
+    <Target Name="GLScene_GPU_DT:Make">
+        <MSBuild Projects="GLScene_GPU_DT.dproj" Targets="Make"/>
     </Target>
     <Target Name="GLScene_Cg_RT">
         <MSBuild Projects="GLScene_Cg_RT.dproj"/>
@@ -132,13 +132,13 @@
         <MSBuild Projects="GLScene_Cg_DT.dproj" Targets="Make"/>
     </Target>
     <Target Name="Build">
-        <CallTarget Targets="GLScene_RT;GLScene_DT;GLScene_Sounds_RT;GLScene_Sounds_DT;GLScene_Physics_RT;GLScene_Physics_DT;GLScene_Parallel_RT;GLScene_Parallel_DT;GLScene_Cg_RT;GLScene_Cg_DT"/>
+        <CallTarget Targets="GLScene_RT;GLScene_DT;GLScene_Sounds_RT;GLScene_Sounds_DT;GLScene_Physics_RT;GLScene_Physics_DT;GLScene_GPU_RT;GLScene_GPU_DT;GLScene_Cg_RT;GLScene_Cg_DT"/>
     </Target>
     <Target Name="Clean">
-        <CallTarget Targets="GLScene_RT:Clean;GLScene_DT:Clean;GLScene_Sounds_RT:Clean;GLScene_Sounds_DT:Clean;GLScene_Physics_RT:Clean;GLScene_Physics_DT:Clean;GLScene_Parallel_RT:Clean;GLScene_Parallel_DT:Clean;GLScene_Cg_RT:Clean;GLScene_Cg_DT:Clean"/>
+        <CallTarget Targets="GLScene_RT:Clean;GLScene_DT:Clean;GLScene_Sounds_RT:Clean;GLScene_Sounds_DT:Clean;GLScene_Physics_RT:Clean;GLScene_Physics_DT:Clean;GLScene_GPU_RT:Clean;GLScene_GPU_DT:Clean;GLScene_Cg_RT:Clean;GLScene_Cg_DT:Clean"/>
     </Target>
     <Target Name="Make">
-        <CallTarget Targets="GLScene_RT:Make;GLScene_DT:Make;GLScene_Sounds_RT:Make;GLScene_Sounds_DT:Make;GLScene_Physics_RT:Make;GLScene_Physics_DT:Make;GLScene_Parallel_RT:Make;GLScene_Parallel_DT:Make;GLScene_Cg_RT:Make;GLScene_Cg_DT:Make"/>
+        <CallTarget Targets="GLScene_RT:Make;GLScene_DT:Make;GLScene_Sounds_RT:Make;GLScene_Sounds_DT:Make;GLScene_Physics_RT:Make;GLScene_Physics_DT:Make;GLScene_GPU_RT:Make;GLScene_GPU_DT:Make;GLScene_Cg_RT:Make;GLScene_Cg_DT:Make"/>
     </Target>
     <Import Project="$(BDS)\Bin\CodeGear.Group.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Group.Targets')"/>
 </Project>

+ 4 - 4
Packages/GLScene_Cg_DT.dproj

@@ -143,10 +143,10 @@
                     <Source Name="MainSource">GLScene_Cg_DT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 4 - 4
Packages/GLScene_Cg_RT.dproj

@@ -158,10 +158,10 @@
                     <Source Name="MainSource">GLScene_Cg_RT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 4 - 4
Packages/GLScene_DT.dproj

@@ -196,10 +196,10 @@
                     <Source Name="MainSource">GLScene_DT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 5 - 4
Packages/GLScene_Parallel_DT.dpk → Packages/GLScene_GPU_DT.dpk

@@ -1,7 +1,7 @@
-package GLScene_Parallel_DT;
+package GLScene_GPU_DT;
 
 {$R *.res}
-{$R 'GLSceneParallel.res'}
+{$R 'GLSceneGPU.res'}
 {$IFDEF IMPLICITBUILDING This IFDEF should not be used by users}
 {$ALIGN 8}
 {$ASSERTIONS ON}
@@ -36,10 +36,11 @@ requires
   vcl,
   VclSmp,
   GLScene_DT,
-  GLScene_Parallel_RT;
+  GLScene_GPU_RT;
 
 contains
   FCUDAEditor in '..\Source\FCUDAEditor.pas' {GLSCUDAEditorForm},
-  GLS.ParallelRegister in '..\Source\GLS.ParallelRegister.pas';
+  GPU.CUDAPropEditors in '..\Source\GPU.CUDAPropEditors.pas',
+  GPU.CUDARegister in '..\Source\GPU.CUDARegister.pas';
 
 end.

+ 17 - 16
Packages/GLScene_Parallel_DT.dproj → Packages/GLScene_GPU_DT.dproj

@@ -1,7 +1,7 @@
 <Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
     <PropertyGroup>
         <ProjectGuid>{AFDCEE2E-E581-458A-A3FE-13C7AC5A6891}</ProjectGuid>
-        <MainSource>GLScene_Parallel_DT.dpk</MainSource>
+        <MainSource>GLScene_GPU_DT.dpk</MainSource>
         <Base>True</Base>
         <Config Condition="'$(Config)'==''">Debug</Config>
         <TargetedPlatforms>1</TargetedPlatforms>
@@ -54,7 +54,7 @@
     <PropertyGroup Condition="'$(Base)'!=''">
         <DCC_CBuilderOutput>All</DCC_CBuilderOutput>
         <GenPackage>true</GenPackage>
-        <SanitizedProjectName>GLScene_Parallel_DT</SanitizedProjectName>
+        <SanitizedProjectName>GLScene_GPU_DT</SanitizedProjectName>
         <VerInfo_IncludeVerInfo>true</VerInfo_IncludeVerInfo>
         <DCC_K>false</DCC_K>
         <GenDll>true</GenDll>
@@ -70,7 +70,7 @@
         <DCC_ImageBase>00400000</DCC_ImageBase>
     </PropertyGroup>
     <PropertyGroup Condition="'$(Base_Win32)'!=''">
-        <DCC_UsePackage>rtl;vcl;GLScene_Parallel_RT;VclSmp;GLScene_RT;$(DCC_UsePackage)</DCC_UsePackage>
+        <DCC_UsePackage>rtl;vcl;GLScene_Parallel_RT;VclSmp;GLScene_RT;GLScene_GPU_RT;$(DCC_UsePackage)</DCC_UsePackage>
         <DCC_Namespace>Data.Win;Datasnap.Win;Web.Win;Soap.Win;Xml.Win;$(DCC_Namespace)</DCC_Namespace>
         <DCC_DcuOutput>..\lib\$(Platform)</DCC_DcuOutput>
         <DCC_BpiOutput>..\lib\$(Platform)</DCC_BpiOutput>
@@ -80,7 +80,7 @@
     <PropertyGroup Condition="'$(Base_Win64)'!=''">
         <DCC_Namespace>Data.Win;Datasnap.Win;Web.Win;Soap.Win;Xml.Win;$(DCC_Namespace)</DCC_Namespace>
         <VerInfo_Keys>CompanyName=;FileDescription=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductName=;ProductVersion=1.0.0.0;Comments=</VerInfo_Keys>
-        <DCC_UsePackage>rtl;vcl;GLScene_Parallel_RT;VclSmp;$(DCC_UsePackage)</DCC_UsePackage>
+        <DCC_UsePackage>rtl;vcl;GLScene_Parallel_RT;VclSmp;GLScene_GPU_RT;$(DCC_UsePackage)</DCC_UsePackage>
     </PropertyGroup>
     <PropertyGroup Condition="'$(Cfg_1)'!=''">
         <DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols>
@@ -112,19 +112,20 @@
         <DelphiCompile Include="$(MainSource)">
             <MainSource>MainSource</MainSource>
         </DelphiCompile>
-        <DCCReference Include="GLSceneParallel.res"/>
+        <DCCReference Include="GLSceneGPU.res"/>
         <DCCReference Include="rtl.dcp"/>
         <DCCReference Include="designide.dcp"/>
         <DCCReference Include="vcl.dcp"/>
         <DCCReference Include="VclSmp.dcp"/>
         <DCCReference Include="GLScene_DT.dcp"/>
-        <DCCReference Include="GLScene_Parallel_RT.dcp"/>
+        <DCCReference Include="GLScene_GPU_RT.dcp"/>
         <DCCReference Include="..\Source\FCUDAEditor.pas">
             <Form>GLSCUDAEditorForm</Form>
         </DCCReference>
-        <DCCReference Include="..\Source\GLS.ParallelRegister.pas"/>
-        <RcCompile Include="..\Resources\GLSceneParallel.rc">
-            <Form>GLSceneParallel.res</Form>
+        <DCCReference Include="..\Source\GPU.CUDAPropEditors.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDARegister.pas"/>
+        <RcCompile Include="..\Resources\GLSceneGPU.rc">
+            <Form>GLSceneGPU.res</Form>
         </RcCompile>
         <BuildConfiguration Include="Debug">
             <Key>Cfg_2</Key>
@@ -144,13 +145,13 @@
         <BorlandProject>
             <Delphi.Personality>
                 <Source>
-                    <Source Name="MainSource">GLScene_Parallel_DT.dpk</Source>
+                    <Source Name="MainSource">GLScene_GPU_DT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>
@@ -158,9 +159,9 @@
                 <Platform value="Win64">False</Platform>
             </Platforms>
             <Deployment Version="3">
-                <DeployFile LocalName="C:\Users\Public\Documents\Embarcadero\Studio\21.0\Bpl\GLScene_Parallel_DT.bpl" Configuration="Debug" Class="ProjectOutput">
+                <DeployFile LocalName="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_GPU_DT.bpl" Configuration="Debug" Class="ProjectOutput">
                     <Platform Name="Win32">
-                        <RemoteName>GLScene_Parallel_DT.bpl</RemoteName>
+                        <RemoteName>GLScene_GPU_DT.bpl</RemoteName>
                         <Overwrite>true</Overwrite>
                     </Platform>
                 </DeployFile>

+ 14 - 13
Packages/GLScene_Parallel_RT.dpk → Packages/GLScene_GPU_RT.dpk

@@ -1,4 +1,4 @@
-package GLScene_Parallel_RT;
+package GLScene_GPU_RT;
 
 {$R *.res}
 {$IFDEF IMPLICITBUILDING This IFDEF should not be used by users}
@@ -37,18 +37,18 @@ requires
   GLScene_RT;
 
 contains
-  GLS.CUDARuntime in '..\Source\GLS.CUDARuntime.pas',
-  GLS.CUDAFourierTransform in '..\Source\GLS.CUDAFourierTransform.pas',
-  GLS.CUDAUtility in '..\Source\GLS.CUDAUtility.pas',
-  GLS.CUDACompiler in '..\Source\GLS.CUDACompiler.pas',
-  GLS.CUDAContext in '..\Source\GLS.CUDAContext.pas',
-  GLS.CUDAFFTPlan in '..\Source\GLS.CUDAFFTPlan.pas',
-  GLS.CUDAGraphics in '..\Source\GLS.CUDAGraphics.pas',
-  GLS.CUDAParser in '..\Source\GLS.CUDAParser.pas',
-  GLS.CUDADataAccess in '..\Source\GLS.CUDADataAccess.pas',
-  GLS.CUDAParallelPrimitives in '..\Source\GLS.CUDAParallelPrimitives.pas',
-  GLS.CUDA in '..\Source\GLS.CUDA.pas',
-  GLS.CUDAApi in '..\Source\GLS.CUDAApi.pas',
+  Import.CUDAApi in '..\Source\Import.CUDAApi.pas',
+  Import.CUDAUtility in '..\Source\Import.CUDAUtility.pas',
+  Import.CUDAParallelPrimitives in '..\Source\Import.CUDAParallelPrimitives.pas',
+  GPU.CUDARuntime in '..\Source\GPU.CUDARuntime.pas',
+  GPU.CUDAFourierTransform in '..\Source\GPU.CUDAFourierTransform.pas',
+  GPU.CUDACompiler in '..\Source\GPU.CUDACompiler.pas',
+  GPU.CUDAContext in '..\Source\GPU.CUDAContext.pas',
+  GPU.CUDAFFTPlan in '..\Source\GPU.CUDAFFTPlan.pas',
+  GPU.CUDAGraphics in '..\Source\GPU.CUDAGraphics.pas',
+  GPU.CUDAParser in '..\Source\GPU.CUDAParser.pas',
+  GPU.CUDADataAccess in '..\Source\GPU.CUDADataAccess.pas',
+  GPU.CUDA in '..\Source\GPU.CUDA.pas',
   GLS.FilePGM in '..\Source\GLS.FilePGM.pas',
   Import.OpenCL in '..\Source\Import.OpenCL.pas',
   Import.OpenCL_GL in '..\Source\Import.OpenCL_GL.pas',
@@ -56,3 +56,4 @@ contains
 
 end.
 
+

+ 21 - 21
Packages/GLScene_Parallel_RT.dproj → Packages/GLScene_GPU_RT.dproj

@@ -1,7 +1,7 @@
 <Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
     <PropertyGroup>
         <ProjectGuid>{F0792472-0CD2-4D29-A0F7-4D3BB36C2E02}</ProjectGuid>
-        <MainSource>GLScene_Parallel_RT.dpk</MainSource>
+        <MainSource>GLScene_GPU_RT.dpk</MainSource>
         <Base>True</Base>
         <Config Condition="'$(Config)'==''">Debug</Config>
         <TargetedPlatforms>3</TargetedPlatforms>
@@ -60,7 +60,7 @@
     <PropertyGroup Condition="'$(Base)'!=''">
         <DCC_CBuilderOutput>All</DCC_CBuilderOutput>
         <GenPackage>true</GenPackage>
-        <SanitizedProjectName>GLScene_Parallel_RT</SanitizedProjectName>
+        <SanitizedProjectName>GLScene_GPU_RT</SanitizedProjectName>
         <VerInfo_IncludeVerInfo>true</VerInfo_IncludeVerInfo>
         <DCC_K>false</DCC_K>
         <GenDll>true</GenDll>
@@ -133,18 +133,18 @@
         <DCCReference Include="vcl.dcp"/>
         <DCCReference Include="xmlrtl.dcp"/>
         <DCCReference Include="GLScene_RT.dcp"/>
-        <DCCReference Include="..\Source\GLS.CUDARuntime.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAFourierTransform.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAUtility.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDACompiler.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAContext.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAFFTPlan.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAGraphics.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAParser.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDADataAccess.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAParallelPrimitives.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDA.pas"/>
-        <DCCReference Include="..\Source\GLS.CUDAApi.pas"/>
+        <DCCReference Include="..\Source\Import.CUDAApi.pas"/>
+        <DCCReference Include="..\Source\Import.CUDAUtility.pas"/>
+        <DCCReference Include="..\Source\Import.CUDAParallelPrimitives.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDARuntime.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDAFourierTransform.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDACompiler.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDAContext.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDAFFTPlan.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDAGraphics.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDAParser.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDADataAccess.pas"/>
+        <DCCReference Include="..\Source\GPU.CUDA.pas"/>
         <DCCReference Include="..\Source\GLS.FilePGM.pas"/>
         <DCCReference Include="..\Source\Import.OpenCL.pas"/>
         <DCCReference Include="..\Source\Import.OpenCL_GL.pas"/>
@@ -167,13 +167,13 @@
         <BorlandProject>
             <Delphi.Personality>
                 <Source>
-                    <Source Name="MainSource">GLScene_Parallel_RT.dpk</Source>
+                    <Source Name="MainSource">GLScene_GPU_RT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>
@@ -181,9 +181,9 @@
                 <Platform value="Win64">True</Platform>
             </Platforms>
             <Deployment Version="3">
-                <DeployFile LocalName="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_Parallel_RT.bpl" Configuration="Debug" Class="ProjectOutput">
+                <DeployFile LocalName="C:\Users\Public\Documents\Embarcadero\Studio\20.0\Bpl\GLScene_GPU_RT.bpl" Configuration="Debug" Class="ProjectOutput">
                     <Platform Name="Win32">
-                        <RemoteName>GLScene_Parallel_RT.bpl</RemoteName>
+                        <RemoteName>GLScene_GPU_RT.bpl</RemoteName>
                         <Overwrite>true</Overwrite>
                     </Platform>
                 </DeployFile>

+ 4 - 4
Packages/GLScene_Physics_DT.dproj

@@ -140,10 +140,10 @@
                     <Source Name="MainSource">GLScene_Physics_DT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 4 - 4
Packages/GLScene_Physics_RT.dproj

@@ -186,10 +186,10 @@
                     <Source Name="MainSource">GLScene_Physics_RT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 5 - 4
Packages/GLScene_RT.dproj

@@ -89,6 +89,7 @@
         <VerInfo_Keys>CompanyName=;FileDescription=$(MSBuildProjectName);FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProgramID=com.embarcadero.$(MSBuildProjectName);ProductName=$(MSBuildProjectName);ProductVersion=1.0.0.0;Comments=</VerInfo_Keys>
         <DCC_UsePackage>vclimg;vcl;rtl;VclSmp;$(DCC_UsePackage)</DCC_UsePackage>
         <BT_BuildType>Debug</BT_BuildType>
+        <DCC_DcuOutput>..\lib\$(Platform)</DCC_DcuOutput>
     </PropertyGroup>
     <PropertyGroup Condition="'$(Cfg_1)'!=''">
         <DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols>
@@ -391,10 +392,10 @@
                     <Source Name="MainSource">GLScene_RT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 4 - 4
Packages/GLScene_Sounds_DT.dproj

@@ -137,10 +137,10 @@
                     <Source Name="MainSource">GLScene_Sounds_DT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 4 - 4
Packages/GLScene_Sounds_RT.dproj

@@ -179,10 +179,10 @@
                     <Source Name="MainSource">GLScene_Sounds_RT.dpk</Source>
                 </Source>
                 <Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k270.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp270.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k270.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
-                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp270.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcboffice2k260.bpl">Embarcadero C++Builder Office 2000 Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\bcbofficexp260.bpl">Embarcadero C++Builder Office XP Servers Package</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dcloffice2k260.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
+                    <Excluded_Packages Name="$(BDSBIN)\dclofficexp260.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
                 </Excluded_Packages>
             </Delphi.Personality>
             <Platforms>

+ 4 - 0
Resources/GLSceneGPU.rc

@@ -0,0 +1,4 @@
+TGLCUDA BITMAP components\TGLCUDA.bmp
+TGLCUDADevice BITMAP components\TGLCUDADevice.bmp
+TGLCUDACompiler BITMAP components\TGLCUDACompiler.bmp
+TCUDAFeedBackMesh BITMAP objects\TCUDAFeedBackMesh.bmp

+ 0 - 4
Resources/GLSceneParallel.rc

@@ -1,4 +0,0 @@
-TGLSCUDA BITMAP components\TGLSCUDA.bmp
-TGLSCUDADevice BITMAP components\TGLSCUDADevice.bmp
-TGLSCUDACompiler BITMAP components\TGLSCUDACompiler.bmp
-TGLFeedBackMesh BITMAP objects\TGLFeedBackMesh.bmp

+ 0 - 0
Resources/components/TGLSCUDA.bmp → Resources/components/TGLCUDA.bmp


+ 0 - 0
Resources/components/TGLSCUDACompiler.bmp → Resources/components/TGLCUDACompiler.bmp


+ 0 - 0
Resources/components/TGLSCUDADevice.bmp → Resources/components/TGLCUDADevice.bmp


+ 0 - 0
Resources/objects/TGLFeedbackMesh.bmp → Resources/objects/TCUDAFeedbackMesh.bmp


+ 1 - 1
Source/FCUDAEditor.dfm

@@ -94,7 +94,7 @@ object GLSCUDAEditorForm: TGLSCUDAEditorForm
     Left = 24
     Top = 48
     Bitmap = {
-      494C010106000800BC0010001000FFFFFFFFFF10FFFFFFFFFFFFFFFF424D3600
+      494C010106000800040010001000FFFFFFFFFF10FFFFFFFFFFFFFFFF424D3600
       0000000000003600000028000000400000002000000001002000000000000020
       0000000000000000000000000000000000000000000000000000000000000000
       0000000000000000000000000000000000000000000000000000000000000000

+ 16 - 17
Source/FCUDAEditor.pas

@@ -14,24 +14,25 @@ uses
   Winapi.Windows, 
   Winapi.Messages,
   System.SysUtils, 
-  System.Variants, 
-  System.Classes, 
+  System.Variants,
+  System.Classes,
   System.Win.Registry,
   System.ImageList,
-  Vcl.Graphics, 
-  Vcl.Controls, 
-  Vcl.Forms, 
+  Vcl.Graphics,
+  Vcl.Controls,
+  Vcl.Forms,
   Vcl.Dialogs,
-  Vcl.ImgList, 
-  Vcl.StdCtrls, 
-  Vcl.ComCtrls, 
+  Vcl.ImgList,
+  Vcl.StdCtrls,
+  Vcl.ComCtrls,
   Vcl.ToolWin,
   DesignIntf,
   VCLEditors,
   GLS.Strings,
-  GLS.CUDA,
-  GLS.CUDAFFTPlan,
-  GLS.CUDAGraphics;
+
+  GPU.CUDA,
+  GPU.CUDAFFTPlan,
+  GPU.CUDAGraphics;
 
 type
   TGLSCUDAEditorForm = class(TForm)
@@ -51,16 +52,14 @@ type
     procedure FormDestroy(Sender: TObject);
   private
     FClassList: TList;
-    FCUDA: TGLSCUDA;
+    FCUDA: TGLCUDA;
     FCurrentDesigner: IDesigner;
   protected
     procedure Notification(AComponent: TComponent; Operation: TOperation);
       override;
     procedure OnCUDAComponentNameChanged(Sender : TObject);
   public
-    
-    procedure SetCUDAEditorClient(Client: TGLSCUDA; Designer: IDesigner);
-
+    procedure SetCUDAEditorClient(Client: TGLCUDA; Designer: IDesigner);
   end;
 
 function GLSCUDAEditorForm: TGLSCUDAEditorForm;
@@ -73,7 +72,7 @@ implementation
 {$R *.dfm}
 
 const
-  cRegistryKey = 'Software\GLScene\GLSCUDAEditor';
+  cRegistryKey = 'Software\GLScene\CUDAEditor';
 
 var
   vGLSCUDAEditorForm: TGLSCUDAEditorForm;
@@ -203,7 +202,7 @@ begin
     FCurrentDesigner.SelectComponent(obj);
 end;
 
-procedure TGLSCUDAEditorForm.SetCUDAEditorClient(Client: TGLSCUDA; Designer: IDesigner);
+procedure TGLSCUDAEditorForm.SetCUDAEditorClient(Client: TGLCUDA; Designer: IDesigner);
 var
   i: Integer;
   child: TCUDAComponent;

+ 2 - 2
Source/GLS.BaseMeshSilhouette.pas

@@ -18,7 +18,7 @@ uses
   GLS.Silhouette;
 
 type
-  TGLFaceGroupConnectivity = class(TConnectivity)
+  TGLFaceGroupConnectivity = class(TGLConnectivity)
   private
     FMeshObject: TMeshObject;
     FOwnsVertices: boolean;
@@ -33,7 +33,7 @@ type
     destructor Destroy; override;
   end;
 
-  TGLBaseMeshConnectivity = class(TBaseConnectivity)
+  TGLBaseMeshConnectivity = class(TGLBaseConnectivity)
   private
     FBaseMesh: TGLBaseMesh;
     FFaceGroupConnectivityList: TList;

+ 0 - 1
Source/GLS.Blur.pas

@@ -845,7 +845,6 @@ end;
 initialization
 // ------------------------------------------------------------------
 
-     // class registrations
   RegisterClass(TGLBlur);
   RegisterClass(TGLMotionBlur);
 

+ 1 - 1
Source/GLS.FilePGM.pas

@@ -22,7 +22,7 @@ uses
   GLS.Strings,
   GLS.ApplicationFileIO,
 
-  GLS.CUDAUtility;
+  Import.CUDAUtility;
 
 
 type

+ 62 - 62
Source/GLS.GeomObjects.pas

@@ -33,6 +33,7 @@ uses
   GLS.VectorTypes,
   GLS.GeometryBB,
   GLS.VectorFileObjects,
+  GLS.PipelineTransformation,
 
   GLS.Context,
   GLS.Objects,
@@ -164,20 +165,20 @@ type
       default [coSides, coBottom];
   end;
 
-  TCylinderPart = (cySides, cyBottom, cyTop);
-  TCylinderParts = set of TCylinderPart;
-  TCylinderAlignment = (caCenter, caTop, caBottom);
+  TGLCylinderPart = (cySides, cyBottom, cyTop);
+  TGLCylinderParts = set of TGLCylinderPart;
+  TGLCylinderAlignment = (caCenter, caTop, caBottom);
 
   //  Cylinder object, can also be used to make truncated cones
   TGLCylinder = class(TGLCylinderBase)
   private
-    FParts: TCylinderParts;
+    FParts: TGLCylinderParts;
     FTopRadius: Single;
-    FAlignment: TCylinderAlignment;
+    FAlignment: TGLCylinderAlignment;
   protected
     procedure SetTopRadius(const aValue: Single);
-    procedure SetParts(aValue: TCylinderParts);
-    procedure SetAlignment(val: TCylinderAlignment);
+    procedure SetParts(aValue: TGLCylinderParts);
+    procedure SetAlignment(val: TGLCylinderAlignment);
     function GetTopRadius: Single; override;
   public
     constructor Create(AOwner: TComponent); override;
@@ -192,28 +193,28 @@ type
     procedure Align(const startPoint, endPoint: TAffineVector); overload;
   published
     property TopRadius: Single read FTopRadius write SetTopRadius;
-    property Parts: TCylinderParts read FParts write SetParts
+    property Parts: TGLCylinderParts read FParts write SetParts
       default [cySides, cyBottom, cyTop];
-    property Alignment: TCylinderAlignment read FAlignment write SetAlignment
+    property Alignment: TGLCylinderAlignment read FAlignment write SetAlignment
       default caCenter;
   end;
 
   //  Capsule object, can also be used to make truncated cones 
   TGLCapsule = class(TGLSceneObject)
   private
-    FParts: TCylinderParts;
+    FParts: TGLCylinderParts;
     FRadius: Single;
     FSlices: Integer;
     FStacks: Integer;
     FHeight: Single;
-    FAlignment: TCylinderAlignment;
+    FAlignment: TGLCylinderAlignment;
   protected
     procedure SetHeight(const aValue: Single);
     procedure SetRadius(const aValue: Single);
     procedure SetSlices(const aValue: integer);
     procedure SetStacks(const aValue: integer);
-    procedure SetParts(aValue: TCylinderParts);
-    procedure SetAlignment(val: TCylinderAlignment);
+    procedure SetParts(aValue: TGLCylinderParts);
+    procedure SetAlignment(val: TGLCylinderAlignment);
   public
     constructor Create(AOwner: TComponent); override;
     procedure Assign(Source: TPersistent); override;
@@ -230,19 +231,19 @@ type
     property Slices: Integer read FSlices write SetSlices;
     property Stacks: Integer read FStacks write SetStacks;
     property Radius: Single read FRadius write SetRadius;
-    property Parts: TCylinderParts read FParts write SetParts
+    property Parts: TGLCylinderParts read FParts write SetParts
       default [cySides, cyBottom, cyTop];
-    property Alignment: TCylinderAlignment read FAlignment write SetAlignment
+    property Alignment: TGLCylinderAlignment read FAlignment write SetAlignment
       default caCenter;
   end;
 
-  TAnnulusPart = (anInnerSides, anOuterSides, anBottom, anTop);
-  TAnnulusParts = set of TAnnulusPart;
+  TGLAnnulusPart = (anInnerSides, anOuterSides, anBottom, anTop);
+  TGLAnnulusParts = set of TGLAnnulusPart;
 
   //  An annulus is a cylinder that can be made hollow (pipe-like) 
   TGLAnnulus = class(TGLCylinderBase)
   private
-    FParts: TAnnulusParts;
+    FParts: TGLAnnulusParts;
     FBottomInnerRadius: Single;
     FTopInnerRadius: Single;
     FTopRadius: Single;
@@ -250,7 +251,7 @@ type
     procedure SetTopRadius(const aValue: Single);
     procedure SetTopInnerRadius(const aValue: Single);
     procedure SetBottomInnerRadius(const aValue: Single);
-    procedure SetParts(aValue: TAnnulusParts);
+    procedure SetParts(aValue: TGLAnnulusParts);
   public
     constructor Create(AOwner: TComponent); override;
     procedure Assign(Source: TPersistent); override;
@@ -265,21 +266,21 @@ type
     property TopInnerRadius: Single read FTopInnerRadius
       write SetTopInnerRadius;
     property TopRadius: Single read FTopRadius write SetTopRadius;
-    property Parts: TAnnulusParts read FParts write SetParts
+    property Parts: TGLAnnulusParts read FParts write SetParts
       default [anInnerSides, anOuterSides, anBottom, anTop];
   end;
 
-  TTorusPart = (toSides, toStartDisk, toStopDisk);
-  TTorusParts = set of TTorusPart;
+  TGLTorusPart = (toSides, toStartDisk, toStopDisk);
+  TGLTorusParts = set of TGLTorusPart;
 
   //  A Torus object 
   TGLTorus = class(TGLSceneObject)
   private
-    FParts: TTorusParts;
+    FParts: TGLTorusParts;
     FRings, FSides: Cardinal;
     FStartAngle, FStopAngle: Single;
     FMinorRadius, FMajorRadius: Single;
-    FMesh: array of array of TVertexRec;
+    FMesh: array of array of TGLVertexRec;
   protected
     procedure SetMajorRadius(const aValue: Single);
     procedure SetMinorRadius(const aValue: Single);
@@ -287,7 +288,7 @@ type
     procedure SetSides(aValue: Cardinal);
     procedure SetStartAngle(const aValue: Single);
     procedure SetStopAngle(const aValue: Single);
-    procedure SetParts(aValue: TTorusParts);
+    procedure SetParts(aValue: TGLTorusParts);
   public
     constructor Create(AOwner: TComponent); override;
     procedure BuildList(var rci: TGLRenderContextInfo); override;
@@ -301,11 +302,11 @@ type
     property Sides: Cardinal read FSides write SetSides default 15;
     property StartAngle: Single read FStartAngle write SetStartAngle;
     property StopAngle: Single read FStopAngle write SetStopAngle;
-    property Parts: TTorusParts read FParts write SetParts default [toSides];
+    property Parts: TGLTorusParts read FParts write SetParts default [toSides];
   end;
 
-  TArrowLinePart = (alLine, alTopArrow, alBottomArrow);
-  TArrowLineParts = set of TArrowLinePart;
+  TGLArrowLinePart = (alLine, alTopArrow, alBottomArrow);
+  TGLArrowLineParts = set of TGLArrowLinePart;
 
   TGLArrowHeadStyle = (ahssStacked, ahssCentered, ahssIncluded);
 
@@ -317,7 +318,7 @@ type
     By default the bottom arrow is off *)
   TGLArrowLine = class(TGLCylinderBase)
   private
-    FParts: TArrowLineParts;
+    FParts: TGLArrowLineParts;
     FTopRadius: Single;
     fTopArrowHeadHeight: Single;
     fTopArrowHeadRadius: Single;
@@ -330,7 +331,7 @@ type
     procedure SetTopArrowHeadRadius(const aValue: Single);
     procedure SetBottomArrowHeadHeight(const aValue: Single);
     procedure SetBottomArrowHeadRadius(const aValue: Single);
-    procedure SetParts(aValue: TArrowLineParts);
+    procedure SetParts(aValue: TGLArrowLineParts);
     procedure SetHeadStackingStyle(const val: TGLArrowHeadStyle);
   public
     constructor Create(AOwner: TComponent); override;
@@ -340,7 +341,7 @@ type
     property TopRadius: Single read FTopRadius write SetTopRadius;
     property HeadStackingStyle: TGLArrowHeadStyle read FHeadStackingStyle
       write SetHeadStackingStyle default ahssStacked;
-    property Parts: TArrowLineParts read FParts write SetParts
+    property Parts: TGLArrowLineParts read FParts write SetParts
       default [alLine, alTopArrow];
     property TopArrowHeadHeight: Single read fTopArrowHeadHeight
       write SetTopArrowHeadHeight;
@@ -373,7 +374,7 @@ type
     fBottomArrowHeadHeight: Single;
     fBottomArrowHeadRadius: Single;
     FHeadStackingStyle: TGLArrowHeadStyle;
-    FMesh: array of array of TVertexRec;
+    FMesh: array of array of TGLVertexRec;
   protected
     procedure SetArcRadius(const aValue: Single);
     procedure SetStartAngle(const aValue: Single);
@@ -408,8 +409,8 @@ type
       write SetBottomArrowHeadRadius;
   end;
 
-  TPolygonPart = (ppTop, ppBottom);
-  TGLPolygonParts = set of TPolygonPart;
+  TGLPolygonPart = (ppTop, ppBottom);
+  TGLPolygonParts = set of TGLPolygonPart;
 
   (* A basic polygon object.
     The curve is described by the Nodes and SplineMode properties, should be
@@ -434,8 +435,8 @@ type
     property Parts: TGLPolygonParts read FParts write SetParts default [ppTop, ppBottom];
   end;
 
-  TFrustrumPart = (fpTop, fpBottom, fpFront, fpBack, fpLeft, fpRight);
-  TFrustrumParts = set of TFrustrumPart;
+  TGLFrustrumPart = (fpTop, fpBottom, fpFront, fpBack, fpLeft, fpRight);
+  TGLFrustrumParts = set of TGLFrustrumPart;
 
 const
   cAllFrustrumParts = [fpTop, fpBottom, fpFront, fpBack, fpLeft, fpRight];
@@ -449,13 +450,13 @@ type
   TGLFrustrum = class(TGLSceneObject)
   private
     FApexHeight, FBaseDepth, FBaseWidth, FHeight: Single;
-    FParts: TFrustrumParts;
+    FParts: TGLFrustrumParts;
     FNormalDirection: TGLNormalDirection;
     procedure SetApexHeight(const aValue: Single);
     procedure SetBaseDepth(const aValue: Single);
     procedure SetBaseWidth(const aValue: Single);
     procedure SetHeight(const aValue: Single);
-    procedure SetParts(aValue: TFrustrumParts);
+    procedure SetParts(aValue: TGLFrustrumParts);
     procedure SetNormalDirection(aValue: TGLNormalDirection);
   protected
     procedure DefineProperties(Filer: TFiler); override;
@@ -476,7 +477,7 @@ type
     property Height: Single read FHeight write SetHeight stored False;
     property NormalDirection: TGLNormalDirection read FNormalDirection
       write SetNormalDirection default ndOutside;
-    property Parts: TFrustrumParts read FParts write SetParts default cAllFrustrumParts;
+    property Parts: TGLFrustrumParts read FParts write SetParts default cAllFrustrumParts;
   end;
 
 //--------------------- TGLTeapot -------------------------
@@ -927,11 +928,10 @@ begin
   inherited Assign(Source);
 end;
 
-
 function TGLCylinderBase.GenerateSilhouette(const silhouetteParameters
   : TGLSilhouetteParameters): TGLSilhouette;
 var
-  connectivity: TConnectivity;
+  Connectivity: TGLConnectivity;
   sil: TGLSilhouette;
   ShadowSlices: integer;
   i: integer;
@@ -943,7 +943,7 @@ var
   HalfHeight: Single;
   ShadowTopRadius: Single;
 begin
-  connectivity := TConnectivity.Create(true);
+  Connectivity := TGLConnectivity.Create(true);
   ShadowSlices := FSlices div 1;
   if FSlices < 5 then
     FSlices := 5;
@@ -974,17 +974,17 @@ begin
     // This should be optimized to use AddIndexedFace, because this method
     // searches for each of the vertices and adds them or re-uses them.
     // Skin
-    connectivity.AddFace(p[2], p[1], p[0]);
-    connectivity.AddFace(p[3], p[2], p[0]);
+    Connectivity.AddFace(p[2], p[1], p[0]);
+    Connectivity.AddFace(p[3], p[2], p[0]);
     // Sides / caps
-    connectivity.AddFace(c1, p[0], p[1]);
-    connectivity.AddFace(p[2], p[3], c2);
+    Connectivity.AddFace(c1, p[0], p[1]);
+    Connectivity.AddFace(p[2], p[3], c2);
     a1 := a1 + PiDivSlices;
   end;
   sil := nil;
-  connectivity.CreateSilhouette(silhouetteParameters, sil, False);
+  Connectivity.CreateSilhouette(silhouetteParameters, sil, False);
   Result := sil;
-  connectivity.Free;
+  Connectivity.Free;
 end;
 
 // ------------------
@@ -1180,7 +1180,7 @@ begin
   Result := FTopRadius;
 end;
 
-procedure TGLCylinder.SetParts(aValue: TCylinderParts);
+procedure TGLCylinder.SetParts(aValue: TGLCylinderParts);
 begin
   if aValue <> FParts then
   begin
@@ -1189,7 +1189,7 @@ begin
   end;
 end;
 
-procedure TGLCylinder.SetAlignment(val: TCylinderAlignment);
+procedure TGLCylinder.SetAlignment(val: TGLCylinderAlignment);
 begin
   if val <> FAlignment then
   begin
@@ -1537,7 +1537,7 @@ begin
   end;
 end;
 
-procedure TGLCapsule.SetParts(aValue: TCylinderParts);
+procedure TGLCapsule.SetParts(aValue: TGLCylinderParts);
 begin
   if aValue <> FParts then
   begin
@@ -1546,7 +1546,7 @@ begin
   end;
 end;
 
-procedure TGLCapsule.SetAlignment(val: TCylinderAlignment);
+procedure TGLCapsule.SetAlignment(val: TGLCylinderAlignment);
 begin
   if val <> FAlignment then
   begin
@@ -1760,7 +1760,7 @@ begin
   end;
 end;
 
-procedure TGLAnnulus.SetParts(aValue: TAnnulusParts);
+procedure TGLAnnulus.SetParts(aValue: TGLAnnulusParts);
 begin
   if aValue <> FParts then
   begin
@@ -2015,7 +2015,7 @@ end;
 
 procedure TGLTorus.BuildList(var rci: TGLRenderContextInfo);
 
-  procedure EmitVertex(ptr: PVertexRec; L1, L2: integer);
+  procedure EmitVertex(ptr: PGLVertexRec; L1, L2: integer);
   begin
     XGL.TexCoord2fv(@ptr^.TexCoord);
     begin
@@ -2035,11 +2035,11 @@ var
   ringDelta, sideDelta: Single;
   ringDir: TAffineVector;
   iFact, jFact: Single;
-  pVertex: PVertexRec;
+  pVertex: PGLVertexRec;
   TanLoc, BinLoc: Integer;
   MeshSize: integer;
   MeshIndex: integer;
-  Vertex: TVertexRec;
+  Vertex: TGLVertexRec;
 begin
   if FMesh = nil then
   begin
@@ -2307,7 +2307,7 @@ begin
   end;
 end;
 
-procedure TGLTorus.SetParts(aValue: TTorusParts);
+procedure TGLTorus.SetParts(aValue: TGLTorusParts);
 begin
   if aValue <> FParts then
   begin
@@ -2449,7 +2449,7 @@ begin
   end;
 end;
 
-procedure TGLArrowLine.SetParts(aValue: TArrowLineParts);
+procedure TGLArrowLine.SetParts(aValue: TGLArrowLineParts);
 begin
   if aValue <> FParts then
   begin
@@ -2684,7 +2684,7 @@ begin
 end;
 
 procedure TGLArrowArc.BuildList(var rci: TGLRenderContextInfo);
-  procedure EmitVertex(ptr: PVertexRec; L1, L2: integer);
+  procedure EmitVertex(ptr: PGLVertexRec; L1, L2: integer);
   begin
     XGL.TexCoord2fv(@ptr^.TexCoord);
     gl.Normal3fv(@ptr^.Normal);
@@ -2702,11 +2702,11 @@ var
   ringDelta, sideDelta: Single;
   ringDir: TAffineVector;
   iFact, jFact: Single;
-  pVertex: PVertexRec;
+  pVertex: PGLVertexRec;
   TanLoc, BinLoc: Integer;
   MeshSize: integer;
   MeshIndex: integer;
-  ConeCenter: TVertexRec;
+  ConeCenter: TGLVertexRec;
   StartOffset, StopOffset: Single;
 begin
   if FMesh = nil then
@@ -3295,7 +3295,7 @@ begin
   end;
 end;
 
-procedure TGLFrustrum.SetParts(aValue: TFrustrumParts);
+procedure TGLFrustrum.SetParts(aValue: TGLFrustrumParts);
 begin
   if aValue <> FParts then
   begin

+ 18 - 18
Source/GLS.Isosurface.pas

@@ -94,8 +94,8 @@ type
     FStepZ: Single;
     VoxelData: PxVoxelData;
     procedure Process_cube;
-    { function test_face(face:byte):Boolean;
-      function test_interior(s:Byte):boolean }
+    (* function test_face(face:byte):Boolean;
+      function test_interior(s:Byte):boolean *)
     procedure Compute_Intersection_Points;
     procedure Add_Triangle(trig: array of Integer; N: Byte; v12: Integer = -1);
     function Add_x_vertex: Integer;
@@ -115,7 +115,7 @@ type
     procedure SetVoxelValue(i, j, k: Integer; HfValue: TxScalarValue);
     function GetVoxelData(i, j, k: Integer): TxVoxel;
     function Voxel(i, j, k: Integer): PxVoxel;
-    function calc_u(v1, v2: Single): Single; virtual;
+    function calc_u(v1, v2: Single): Extended; virtual;
   public
     ScalarField: TxScalarField;
     constructor Create; overload; virtual;
@@ -165,19 +165,19 @@ type
   end;
 
 // Sphere surface
-function SFSphere(X, Y, Z: Single): TxScalarValue;
+function SFSphere(X, Y, Z: Extended): TxScalarValue;
 // Minkowski space (http://mathworld.wolfram.com)
-function SFMinkowski(X, Y, Z: Single): TxScalarValue;
+function SFMinkowski(X, Y, Z: Extended): TxScalarValue;
 // Klein Bottle (http://mathworld.wolfram.com)
-function SFKleinBottle(X, Y, Z: Single): TxScalarValue;
+function SFKleinBottle(X, Y, Z: Extended): TxScalarValue;
 // Chmutov-surface-1 (http://mathworld.wolfram.com)
-function SFChmutov1(X, Y, Z: Single): TxScalarValue;
+function SFChmutov1(X, Y, Z: Extended): TxScalarValue;
 // Chmutov-surface-2 (http://mathworld.wolfram.com)
-function SFChmutov2(X, Y, Z: Single): TxScalarValue;
+function SFChmutov2(X, Y, Z: Extended): TxScalarValue;
 // Toroidal surface (phantasy!)
-function SFToroidal(X, Y, Z: Single): TxScalarValue;
+function SFToroidal(X, Y, Z: Extended): TxScalarValue;
 // Double torus Surface (phantasy!)
-function SFDoubleTorus(X, Y, Z: Single): TxScalarValue;
+function SFDoubleTorus(X, Y, Z: Extended): TxScalarValue;
 
 const
   DemoScalarField: array [0 .. 6] of
@@ -514,12 +514,12 @@ const
     (0, 2, 3, 6), (0, 3, 7, 6), (0, 7, 4, 6), (0, 4, 5, 6));
 
 // Test surface functions
-function SFSphere(X, Y, Z: Single): TxScalarValue;
+function SFSphere(X, Y, Z: Extended): TxScalarValue;
 begin
   Result := sqr(X) + sqr(Y) + sqr(Z)
 end;
 
-function SFToroidal(X, Y, Z: Single): TxScalarValue;
+function SFToroidal(X, Y, Z: Extended): TxScalarValue;
 const
   FScale = 7;
   a = 2.5;
@@ -532,7 +532,7 @@ begin
     (sqr(sqrt(sqr(Z) + sqr(X)) - a) + sqr(Y));
 end;
 
-function SFDoubleTorus(X, Y, Z: Single): TxScalarValue;
+function SFDoubleTorus(X, Y, Z: Extended): TxScalarValue;
 const
   FScale = 2.25;
 begin
@@ -544,7 +544,7 @@ begin
     PowerInteger(Y, 4) + sqr(Z)
 end;
 
-function SFChmutov1(X, Y, Z: Single): TxScalarValue;
+function SFChmutov1(X, Y, Z: Extended): TxScalarValue;
 const
   FScale = 2.5;
 begin
@@ -555,7 +555,7 @@ begin
     (PowerInteger(X, 4) + PowerInteger(Y, 4) + PowerInteger(Z, 4));
 end;
 
-function SFChmutov2(X, Y, Z: Single): TxScalarValue;
+function SFChmutov2(X, Y, Z: Extended): TxScalarValue;
 const
   FScale = 2.5;
 begin
@@ -566,7 +566,7 @@ begin
     sqr(Z) * sqr(3 - 4 * sqr(Z)));
 end;
 
-function SFKleinBottle(X, Y, Z: Single): TxScalarValue;
+function SFKleinBottle(X, Y, Z: Extended): TxScalarValue;
 const
   FScale = 7.5;
 begin
@@ -578,7 +578,7 @@ begin
     (sqr(X) + sqr(Y) + sqr(Z) - 2 * Y - 1);
 end;
 
-function SFMinkowski(X, Y, Z: Single): TxScalarValue;
+function SFMinkowski(X, Y, Z: Extended): TxScalarValue;
 const
   FScale = 7;
 begin
@@ -990,7 +990,7 @@ begin
   end
 end;
 
-function TGLMarchingCube.calc_u(v1, v2: Single): Single;
+function TGLMarchingCube.calc_u(v1, v2: Single): Extended;
 begin
   if (abs(FIsoValue - v1) >= 0.00001) then
     Result := 1

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 274 - 250
Source/GLS.Objects.pas


+ 1 - 3
Source/GLS.PersistentClasses.pas

@@ -356,9 +356,7 @@ begin
     Result := '';
     Exit;
   end;
-
   SetLength(Result, sLength); // creates enough room
-
   L := 1;
   T := 1;
   while L <= Cardinal(sLength) do
@@ -1342,7 +1340,7 @@ end;
 function TGLBinaryReader.ReadFloat: Extended;
 {$IFDEF WIN64}
 var
-   C  :TExtended80Rec; // Temporary variable to store 10 bytes floating point number in a Win64 application
+   C: TExtended80Rec; // Temporary variable to store 10 bytes floating point number in a Win64 application
 {$ENDIF}
 begin
   Result := 0.0;

+ 1 - 0
Source/GLS.ProxyObjects.pas

@@ -11,6 +11,7 @@ interface
 {$I GLScene.inc}
 
 uses
+  Winapi.OpenGL,
   System.Classes,
   System.SysUtils,
 

+ 6 - 6
Source/GLS.ROAMPatch.pas

@@ -179,7 +179,7 @@ procedure DrawContours(Vertices: TAffineVectorList; VertexIndices: TIntegerList;
   ContourInterval: Integer; ContourWidth: Integer; DecVal: Integer);
 var
   i: Integer;
-  Contours: TAffineVectorList;
+  Isolines: TAffineVectorList;
   CurColor: TVector;
 
 begin
@@ -188,12 +188,12 @@ begin
     gl.PolygonOffset(1, 1);
     gl.Enable(GL_POLYGON_OFFSET_FILL);
     i := VertexIndices.Count - 3;
-    Contours := TAffineVectorList.Create;
+    Isolines := TAffineVectorList.Create;
     while i >= 0 do
     begin
       TriangleElevationSegments(Vertices[VertexIndices[i]],
         Vertices[VertexIndices[i + 1]], Vertices[VertexIndices[i + 2]],
-        ContourInterval, Contours);
+        ContourInterval, Isolines);
       Dec(i, DecVal);
     end;
     gl.PushAttrib(GL_ENABLE_BIT or GL_CURRENT_BIT);
@@ -202,12 +202,12 @@ begin
     gl.GetFloatv(GL_CURRENT_COLOR, @CurColor);
     gl.Color4f(0, 0, 0, 1);
     gl.Begin_(GL_LINES);
-     for i := 0 to Contours.Count - 1 do
-       gl.Vertex3fv(@Contours.List[i]);
+     for i := 0 to Isolines.Count - 1 do
+       gl.Vertex3fv(@Isolines.List[i]);
     gl.End_;
     gl.Color4fv(@CurColor);
     gl.PopAttrib;
-    Contours.Free;
+    Isolines.Free;
   end;
 end;
 

+ 27 - 28
Source/GLS.SceneRegister.pas

@@ -1320,16 +1320,16 @@ end;
 // ******************************************************
 // Register Properties
 //*******************************************************
-procedure RegisterPropertiesInCategories;
+procedure GLSceneRegisterPropertiesInCategories;
 begin
   // property types
   // ScreenDepth in Win32FullScreenViewer
   RegisterPropertiesInCategory(strOpenGLCategoryName,
      [TypeInfo(TGLCamera), TypeInfo(TGLSceneBuffer),
      TypeInfo(TGLVSyncMode), TypeInfo(TGLScreenDepth)]);
-  // SceneViewer
+  // TGLSceneViewer
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLSceneViewer, ['*Render']);
-  // Scene
+  // GLScene
   RegisterPropertiesInCategory(strOpenGLCategoryName,
     [TypeInfo(TGLObjectsSorting), TypeInfo(TGLProgressEvent),
     TypeInfo(TGLBehaviours), TypeInfo(TGLEffects),
@@ -1350,11 +1350,11 @@ begin
   RegisterPropertiesInCategory(strVisualCategoryName, TGLCamera, ['DepthOfView', 'SceneScale']);
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLNonVisualViewer, ['*Render']);
 
-  // Objects
+  // GLObjects
   RegisterPropertiesInCategory(strOpenGLCategoryName,
     [TypeInfo(TGLLinesNodes), TypeInfo(TGLLineNodesAspect),
     TypeInfo(TGLLineSplineMode), TypeInfo(TGLLinesOptions)]);
-  // DummyCube
+  // GLDummyCube
   RegisterPropertiesInCategory(strLayoutCategoryName, TGLDummyCube, ['VisibleAtRunTime']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLDummyCube, ['CubeSize', 'VisibleAtRunTime']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLPlane, ['*Offset', '*Tiles']);
@@ -1366,7 +1366,7 @@ begin
     ['Antialiased', 'Division', 'Line*', 'NodeSize']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLCube, ['Cube*']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLFrustrum, ['ApexHeight', 'Base*']);
-  // SpaceText
+  // GLSpaceText
   RegisterPropertiesInCategory(strLayoutCategoryName, [TypeInfo(TGLTextAdjust)]);
   RegisterPropertiesInCategory(strLocalizableCategoryName, [TypeInfo(TGLSpaceTextCharRange)]);
   RegisterPropertiesInCategory(strVisualCategoryName, [TypeInfo(TGLLineSplineMode),
@@ -1392,18 +1392,18 @@ begin
     ['Bottom*', 'Loops', 'Slices', 'Stacks', 'Top*']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLPolygon, ['Division']);
 
-  // MultiPolygon
+  // GLMultiPolygon
   RegisterPropertiesInCategory(strVisualCategoryName, TGLContour, ['Division']);
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLContourNodes), TypeInfo(TGLContours)]);
 
-  // Extrusion
+  // GLExtrusion
   RegisterPropertiesInCategory(strVisualCategoryName, TGLExtrusionSolid, ['Stacks']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLPipeNode, ['RadiusFactor']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLPipe, ['Division', 'Radius', 'Slices']);
   RegisterPropertiesInCategory(strVisualCategoryName, [TypeInfo(TGLNodes), TypeInfo(TPipeNodesColorMode)]);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLRevolutionSolid, ['Division', 'Slices', 'YOffsetPerTurn']);
 
-  // VectorFileObjects
+  // GLVectorFileObjects
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLActorAnimationMode), TypeInfo(TGLActorAnimations),
     TypeInfo(TGLMeshAutoCenterings), TypeInfo(TGLActorFrameInterpolation),
     TypeInfo(TGLActorAnimationReference), TypeInfo(TGLActor)]);
@@ -1418,21 +1418,21 @@ begin
     ['*Frame*', 'Interval', 'OverlaySkeleton', 'UseMeshmaterials']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLActor,  ['OverlaySkeleton']);
 
-  // Mesh
+  // GLMesh 
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLMeshMode), TypeInfo(TGLVertexMode)]);
 
-  // Graph
+  // GLGraph 
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLHeightFieldOptions)]);
   RegisterPropertiesInCategory(strVisualCategoryName, [TypeInfo(TGLHeightFieldColorMode), TypeInfo(TGLSamplingScale),
     TypeInfo(TGLXYZGridLinesStyle), TypeInfo(TGLXYZGridParts)]);
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLXYZGrid, ['Antialiased']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLXYZGrid, ['Antialiased', 'Line*']);
 
-  // Particles
+  // GLParticles
   RegisterPropertiesInCategory(strLayoutCategoryName, TGLParticles, ['VisibleAtRunTime']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLParticles, ['*Size', 'VisibleAtRunTime']);
 
-  // Skydome
+  // GLSkydome
   RegisterPropertiesInCategory(strOpenGLCategoryName,
     [TypeInfo(TGLSkyDomeBands), TypeInfo(TGLSkyDomeOptions), TypeInfo(TGLSkyDomeStars)]);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLSkyDomeBand, ['Slices', 'Stacks', '*Angle']);
@@ -1440,11 +1440,11 @@ begin
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLEarthSkyDome,
     ['Slices', 'Stacks', 'SunElevation', 'Turbidity']);
 
-  // Mirror
+  // GLMirror
   RegisterPropertiesInCategory(strOpenGLCategoryName,
     [TypeInfo(TGLMirrorOptions), TypeInfo(TGLBaseSceneObject)]);
 
-  // ParticleFX
+  // GLParticleFX 
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLBlendingMode)]);
   RegisterPropertiesInCategory(strVisualCategoryName,
     [TypeInfo(TGLBlendingMode), TypeInfo(TPFXLifeColors), TypeInfo(TSpriteColorMode)]);
@@ -1456,48 +1456,48 @@ begin
   RegisterPropertiesInCategory(strVisualCategoryName, TGLPolygonPFXManager, ['NbSides']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLPointLightPFXManager, ['TexMapSize']);
 
-  // TerrainRenderer
+  // GLTerrainRenderer 
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLHeightDataSource)]);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLTerrainRenderer, ['*CLOD*', 'QualityDistance', 'Tile*']);
 
-  // zBuffer
+  // GLzBuffer 
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLMemoryViewer),
     TypeInfo(TGLSceneViewer), TypeInfo(TOptimise)]);
   RegisterPropertiesInCategory(strVisualCategoryName, [TypeInfo(TOptimise)]);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLZShadows, ['DepthFade', '*Shadow', 'Soft', 'Tolerance']);
 
-  // HUDObjects
+  // GLHUDObjects
   RegisterPropertiesInCategory(strLayoutCategoryName, [TypeInfo(TTextLayout)]);
   RegisterPropertiesInCategory(strVisualCategoryName, [TypeInfo(TGLBitmapFont), TypeInfo(TTextLayout)]);
   RegisterPropertiesInCategory(strLocalizableCategoryName,[TypeInfo(TGLBitmapFont)]);
 
-  // Texture
+  // GLTexture
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLMaterial), TypeInfo(TGLMaterialLibrary),
     TypeInfo(TGLLibMaterials), TypeInfo(TGLTextureNeededEvent)]);
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLLibMaterial, ['Texture2Name']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLLibMaterial, ['TextureOffset', 'TextureScale']);
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLMaterialLibrary, ['TexturePaths']);
 
-  // Cadencer
+  // GLCadencer
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLCadencer)]);
 
-  // Collision
+  // GLCollision
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TObjectCollisionEvent)]);
 
-  // FireFX
+  // GLFireFX
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLFireFXManager,
     ['MaxParticles', 'NoZWrite', 'Paused', 'UseInterval']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLFireFXManager,
     ['Fire*', 'InitialDir', 'NoZWrite', 'Particle*', 'Paused']);
 
-  // ThorFX
+  // GLThorFX
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TCalcPointEvent)]);
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLThorFXManager,
     ['Maxpoints', 'Paused']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLThorFXManager,
     ['Core', 'Glow*', 'Paused', 'Target', 'Vibrate', 'Wildness']);
 
-  // BitmapFont
+  // GLBitmapFont 
   RegisterPropertiesInCategory(strOpenGLCategoryName, [TypeInfo(TGLMagFilter), TypeInfo(TGLMinFilter)]);
   RegisterPropertiesInCategory(strLocalizableCategoryName, [TypeInfo(TGLBitmapFontRanges)]);
   RegisterPropertiesInCategory(strLocalizableCategoryName, TGLBitmapFontRange, ['*ASCII']);
@@ -1506,7 +1506,7 @@ begin
   RegisterPropertiesInCategory(strVisualCategoryName, TGLBitmapFont,
     ['Char*', '*Interval*', '*Space', 'Glyphs']);
 
-  // HeightData
+  // GLHeightData
   RegisterPropertiesInCategory(strOpenGLCategoryName, TGLBitmapHDS, ['MaxPoolSize']);
   RegisterPropertiesInCategory(strVisualCategoryName, TGLBitmapHDS, ['Picture']);
 end;
@@ -1555,7 +1555,7 @@ begin
   RegisterComponentEditor(TGLMaterialLibraryEx, TGLMaterialLibraryEditor);
   RegisterComponentEditor(TGLSArchiveManager, TGLSArchiveManagerEditor);
 
-  RegisterPropertiesInCategories;
+  GLSceneRegisterPropertiesInCategories;
 
   RegisterPropertyEditor(TypeInfo(TResolution), nil, '', TGLResolutionProperty);
   RegisterPropertyEditor(TypeInfo(TGLTexture), TGLMaterial, '', TGLTextureProperty);
@@ -1797,13 +1797,12 @@ begin
   RegisterSceneObject(TGLSLProjectedTextures, 'GLSL Projected Textures', strOCSpecialObjects, HInstance);
   RegisterSceneObject(TGLTextureEmitter, 'Texture Emitter', strOCSpecialObjects, HInstance);
   RegisterSceneObject(TGLProjectedTextures, 'Projected Textures', strOCSpecialObjects, HInstance);
-
   RegisterSceneObject(TGLBlur, 'Blur', strOCSpecialObjects, HInstance);
   RegisterSceneObject(TGLMotionBlur, 'MotionBlur', strOCSpecialObjects, HInstance);
 
   RegisterSceneObject(TGLSpaceText, 'SpaceText', strOCDoodad, HInstance);
 
-  RegisterSceneObject(TGLTrail, 'GLS.Trail', strOCSpecialObjects, HInstance);
+  RegisterSceneObject(TGLTrail, 'GLTrail', strOCSpecialObjects, HInstance);
   RegisterSceneObject(TGLPostEffect, 'PostEffect', strOCSpecialObjects, HInstance);
   RegisterSceneObject(TGLPostShaderHolder, 'PostShaderHolder', strOCSpecialObjects, HInstance);
 

+ 19 - 19
Source/GLS.Silhouette.pas

@@ -75,7 +75,7 @@ type
     procedure AddIndexedCapToSilhouette(const Vi0, Vi1, vi2: integer); inline;
   end;
 
-  TBaseConnectivity = class
+  TGLBaseConnectivity = class
   protected
     FPrecomputeFaceNormal: Boolean;
     function GetEdgeCount: integer; virtual;
@@ -89,7 +89,7 @@ type
     constructor Create(APrecomputeFaceNormal: Boolean); virtual;
   end;
 
-  TConnectivity = class(TBaseConnectivity)
+  TGLConnectivity = class(TGLBaseConnectivity)
   protected
     (* All storage of faces and adges are cut up into tiny pieces for a reason,
       it'd be nicer with Structs or classes, but it's actually faster this way.
@@ -247,35 +247,35 @@ begin
 end;
 
 // ------------------
-// ------------------ TBaseConnectivity ------------------
+// ------------------ TGLBaseConnectivity ------------------
 // ------------------
 
-constructor TBaseConnectivity.Create(APrecomputeFaceNormal: Boolean);
+constructor TGLBaseConnectivity.Create(APrecomputeFaceNormal: Boolean);
 begin
   FPrecomputeFaceNormal := APrecomputeFaceNormal;
 end;
 
-procedure TBaseConnectivity.CreateSilhouette(const ASilhouetteParameters: TGLSilhouetteParameters;
+procedure TGLBaseConnectivity.CreateSilhouette(const ASilhouetteParameters: TGLSilhouetteParameters;
   var ASilhouette: TGLSilhouette; AddToSilhouette: Boolean);
 begin
   // Purely virtual!
 end;
 
 // ------------------
-// ------------------ TConnectivity ------------------
+// ------------------ TGLConnectivity ------------------
 // ------------------
 
-function TBaseConnectivity.GetEdgeCount: integer;
+function TGLBaseConnectivity.GetEdgeCount: integer;
 begin
   result := 0;
 end;
 
-function TBaseConnectivity.GetFaceCount: integer;
+function TGLBaseConnectivity.GetFaceCount: integer;
 begin
   result := 0;
 end;
 
-constructor TConnectivity.Create(APrecomputeFaceNormal: Boolean);
+constructor TGLConnectivity.Create(APrecomputeFaceNormal: Boolean);
 begin
   FFaceVisible := TByteList.Create;
 
@@ -290,7 +290,7 @@ begin
   FVertices := TAffineVectorList.Create;
 end;
 
-destructor TConnectivity.Destroy;
+destructor TGLConnectivity.Destroy;
 begin
   Clear;
 
@@ -309,7 +309,7 @@ begin
   inherited;
 end;
 
-procedure TConnectivity.Clear;
+procedure TGLConnectivity.Clear;
 begin
   FEdgeVertices.Clear;
   FEdgeFaces.Clear;
@@ -322,7 +322,7 @@ begin
     FVertices.Clear;
 end;
 
-procedure TConnectivity.CreateSilhouette(const silhouetteParameters: TGLSilhouetteParameters; var ASilhouette: TGLSilhouette;
+procedure TGLConnectivity.CreateSilhouette(const silhouetteParameters: TGLSilhouetteParameters; var ASilhouette: TGLSilhouette;
   AddToSilhouette: Boolean);
 var
   i: integer;
@@ -391,17 +391,17 @@ begin
   end;
 end;
 
-function TConnectivity.GetEdgeCount: integer;
+function TGLConnectivity.GetEdgeCount: integer;
 begin
   result := FEdgeVertices.Count div 2;
 end;
 
-function TConnectivity.GetFaceCount: integer;
+function TGLConnectivity.GetFaceCount: integer;
 begin
   result := FFaceVisible.Count;
 end;
 
-function TConnectivity.ReuseOrFindVertexID(const SeenFrom: TAffineVector; ASilhouette: TGLSilhouette; index: integer): integer;
+function TGLConnectivity.ReuseOrFindVertexID(const SeenFrom: TAffineVector; ASilhouette: TGLSilhouette; index: integer): integer;
 var
   pMemIndex: PInteger;
   memIndex, i: integer;
@@ -431,7 +431,7 @@ begin
     result := pMemIndex^;
 end;
 
-function TConnectivity.AddIndexedEdge(vertexIndex0, vertexIndex1: integer; FaceID: integer): integer;
+function TGLConnectivity.AddIndexedEdge(vertexIndex0, vertexIndex1: integer; FaceID: integer): integer;
 var
   i: integer;
   edgesVertices: PIntegerArray;
@@ -460,7 +460,7 @@ begin
   result := EdgeCount - 1;
 end;
 
-function TConnectivity.AddIndexedFace(Vi0, Vi1, vi2: integer): integer;
+function TGLConnectivity.AddIndexedFace(Vi0, Vi1, vi2: integer): integer;
 var
   FaceID: integer;
 begin
@@ -476,7 +476,7 @@ begin
   result := FaceID;
 end;
 
-function TConnectivity.AddFace(const vertex0, vertex1, vertex2: TAffineVector): integer;
+function TGLConnectivity.AddFace(const vertex0, vertex1, vertex2: TAffineVector): integer;
 var
   Vi0, Vi1, vi2: integer;
 begin
@@ -487,7 +487,7 @@ begin
   result := AddIndexedFace(Vi0, Vi1, vi2);
 end;
 
-function TConnectivity.AddQuad(const vertex0, vertex1, vertex2, vertex3: TAffineVector): integer;
+function TGLConnectivity.AddQuad(const vertex0, vertex1, vertex2, vertex3: TAffineVector): integer;
 var
   Vi0, Vi1, vi2, Vi3: integer;
 begin

+ 8 - 15
Source/GLS.TerrainRenderer.pas

@@ -40,8 +40,7 @@ const
 
 type
   TGetTerrainBoundsEvent = procedure(var l, t, r, b: Single) of object;
-  TPatchPostRenderEvent = procedure(var rci: TGLRenderContextInfo;
-    const patches: TList) of object;
+  TPatchPostRenderEvent = procedure(var rci: TGLRenderContextInfo; const patches: TList) of object;
   TGLHeightDataPostRenderEvent = procedure(var rci: TGLRenderContextInfo;
     var HeightDatas: TList) of object;
   TMaxCLODTrianglesReachedEvent = procedure(var rci: TGLRenderContextInfo)
@@ -87,8 +86,8 @@ type
     procedure MarkAllTilesAsUnused;
     procedure ReleaseAllUnusedTiles;
     procedure MarkHashedTileAsUsed(const tilePos: TAffineVector);
-    function HashedTile(const tilePos: TAffineVector;
-      canAllocate: Boolean = True): TGLHeightData; overload;
+    function HashedTile(const tilePos: TAffineVector; canAllocate: Boolean = True)
+	  : TGLHeightData; overload;
     function HashedTile(const xLeft, yTop: Integer; canAllocate: Boolean = True)
       : TGLHeightData; overload;
     procedure SetHeightDataSource(const val: TGLHeightDataSource);
@@ -127,21 +126,18 @@ type
     //  Size of the terrain tiles. Must be a power of two.
     property TileSize: Integer read FTileSize write SetTileSize default 16;
     // Number of tiles required for a full texture map.
-    property TilesPerTexture: Single read FTilesPerTexture
-      write SetTilesPerTexture;
+    property TilesPerTexture: Single read FTilesPerTexture write SetTilesPerTexture;
     (* Link to the material library holding terrain materials.
       If unspecified, and for all terrain tiles with unspecified material,
       the terrain renderer's material is used. *)
-    property MaterialLibrary: TGLMaterialLibrary read FMaterialLibrary
-      write SetMaterialLibrary;
+    property MaterialLibrary: TGLMaterialLibrary read FMaterialLibrary write SetMaterialLibrary;
     (* Quality distance hint.
       This parameter gives an hint to the terrain renderer at which distance
       the terrain quality can be degraded to favor speed. The distance is
       expressed in absolute coordinates units.
       All tiles closer than this distance are rendered according to
       QualityStyle and with a static resolution. *)
-    property QualityDistance: Single read FQualityDistance
-      write FQualityDistance;
+    property QualityDistance: Single read FQualityDistance write FQualityDistance;
     (* Determines how high-res tiles (closer than QualityDistance) are rendered.
       hrsFullGeometry (default value) means that the high-res tiles are rendered
       with full-geometry, and no LOD of any kind, while hrsTesselated means
@@ -159,8 +155,7 @@ type
       The lower the value, the higher the precision and triangle count.
       Large values will result in coarse terrain.
       high-resolution tiles (closer than QualityDistance) ignore this setting. *)
-    property CLODPrecision: Integer read FCLODPrecision write SetCLODPrecision
-      default 100;
+    property CLODPrecision: Integer read FCLODPrecision write SetCLODPrecision default 100;
     (* Numbers of frames to skip for a tile when occlusion testing found it invisible.
       Occlusion testing can help reduce CPU, T&L and fillrate requirements
       when tiles are occluded, either by the terrain itself (tiles behind
@@ -187,8 +182,7 @@ type
       (this will mean increasing your triangle count though, so you'll
       trade CPU power against T&L power). *)
     property OcclusionTesselate: TTerrainOcclusionTesselate
-      read FOcclusionTesselate write FOcclusionTesselate
-      default totTesselateIfVisible;
+      read FOcclusionTesselate write FOcclusionTesselate default totTesselateIfVisible;
     (* Allows to specify terrain bounds.
       Default rendering bounds will reach depth of view in all direction,
       with this event you can chose to specify a smaller rendered terrain area *)
@@ -1064,7 +1058,6 @@ end;
 initialization
 // ------------------------------------------------------------------
 
-// class registrations
 RegisterClass(TGLTerrainRenderer);
 
 end.

+ 1 - 17
Source/GLS.Utils.pas

@@ -111,8 +111,6 @@ function SavePictureDialog(var aFileName: string; const aTitle: string = ''): Bo
 function OpenPictureDialog(var aFileName: string; const aTitle: string = ''): Boolean;
 
 procedure SetGLSceneMediaDir();
-function GetSceneMediaPath(): TFileName;
-
 
 //------------------ from CrossPlatform -----------------------
 
@@ -621,25 +619,11 @@ begin
    path := ParamStr(0);
    path := LowerCase(ExtractFilePath(path));
    p := Pos('demos', path);
-   Delete(path, p+6, Length(path));
+   Delete(path, p+5, Length(path));
    path := IncludeTrailingPathDelimiter(path) + 'media';
    SetCurrentDir(path);
 end;
 
-function GetSceneMediaPath: TFileName;
-var
-  path: String;
-  p: Integer;
-begin
-   path := ParamStr(0);
-   path := LowerCase(ExtractFilePath(path));
-   p := Pos('demos', path);
-   Delete(path, p+6, Length(path));
-   Result := IncludeTrailingPathDelimiter(path) + 'media';
-   SetCurrentDir(Result);
-end;
-
-
 //------------ from CrossPfatform -------------------
 
 

+ 68 - 67
Source/GLS.VectorRecTypes.pas

@@ -4,7 +4,9 @@
 
 unit GLS.VectorRecTypes;
 
-(* Defines common vector types as advanced records *)
+(* Defines common vector types as advanced records using
+   BigIntegers and BigDecimals by Rudy Velthuis:
+   https://github.com/rvelthuis *)
 
 interface
 
@@ -20,8 +22,8 @@ uses
 
 
 type
-  TAbstractVector = array of Extended;
-  TAbstractMatrix = array of array of Extended;
+  TxBigMatrix = array of Extended; // replace with BigDecimals
+  T_BigMatrix = array of array of Extended;  // replace with BigDecimals
 
   TxQuaternion = record
   private
@@ -29,11 +31,11 @@ type
     procedure SetElement(Index: Byte; Value: Extended);
     function GetElement(Index: Byte): Extended;
   public
-    constructor Create(Q: TAbstractVector);
+    constructor Create(Q: TxBigMatrix);
     class operator Multiply(Q1, Q2: TxQuaternion): TxQuaternion;
     class operator Multiply(Q: TxQuaternion; Sc: Extended): TxQuaternion;
     class operator Multiply(Scalar: Extended; Q: TxQuaternion): TxQuaternion;
-    class operator Implicit(V: TAbstractVector): TxQuaternion;
+    class operator Implicit(V: TxBigMatrix): TxQuaternion;
     function Inv: TxQuaternion;
     function TruncateSTI: TxQuaternion;
     property Element[index: Byte]: Extended read GetElement
@@ -43,14 +45,14 @@ type
   PxVector = ^TxVector;
   TxVector = record
   private
-    FData: TAbstractVector;
+    FData: TxBigMatrix;
     FCount: Word;
     procedure SetElement(Index: Word; Value: Extended);
     function GetElement(Index: Word): Extended;
     procedure CheckUnique;
   public
     constructor Create(ElementsCount: Word); overload;
-    constructor Create(V: TAbstractVector); overload;
+    constructor Create(V: TxBigMatrix); overload;
     class operator Add(V1, V2: TxVector): TxVector;
     class operator Add(V: TxVector; Scalar: Extended): TxVector;
     class operator Add(Scalar: Extended; V: TxVector): TxVector;
@@ -62,7 +64,7 @@ type
     class operator Multiply(Scalar: Extended; V: TxVector): TxVector;
     class operator Divide(V: TxVector; Scalar: Extended): TxVector;
     class operator Divide(V1, V2: TxVector): TxVector;
-    class operator Implicit(V: TAbstractVector): TxVector;
+    class operator Implicit(V: TxBigMatrix): TxVector;
     function Norm: Extended;
     function SumOfSquares: Extended;
     function SumOfElments: Extended;
@@ -78,7 +80,7 @@ type
   PxMatrix = ^TxMatrix;
   TxMatrix = record
   private
-    FData: TAbstractMatrix;
+    FData: T_BigMatrix;
     FRowsCount: Word;
     FColsCount: Word;
     procedure SetElement(Row, Col: Word; Value: Extended);
@@ -93,7 +95,7 @@ type
   public
     constructor Create(RowsCount, ColsCount: Word); overload;
     constructor CreateDiag(Dim: Word; Value: Extended = 1.0);
-    constructor Create(M: TAbstractMatrix); overload;
+    constructor Create(M: T_BigMatrix); overload;
     class operator Add(M1, M2: TxMatrix): TxMatrix;
     class operator Subtract(M1, M2: TxMatrix): TxMatrix;
     class operator Multiply(M1, M2: TxMatrix): TxMatrix;
@@ -102,7 +104,7 @@ type
     class operator Multiply(M: TxMatrix; Scalar: Extended): TxMatrix;
     class operator Multiply(Scalar: Extended; M: TxMatrix): TxMatrix;
     class operator Multiply(M: TxMatrix; Q: TxQuaternion): TxQuaternion;
-    class operator Implicit(M: TAbstractMatrix): TxMatrix;
+    class operator Implicit(M: T_BigMatrix): TxMatrix;
     function Transp: TxMatrix;
     function Inv: TxMatrix;
     function ToQuat: TxQuaternion;
@@ -136,10 +138,9 @@ type
     property ColCount: Integer read FColCount;
   end;
 
-  function TxVec(V: TAbstractVector): TxVector;
-  function TxMat(M: TAbstractMatrix): TxMatrix;
-  function TxQuat(Q: TAbstractVector): TxQuaternion;
-
+  function TxVec(V: TxBigMatrix): TxVector;
+  function TxMat(M: T_BigMatrix): TxMatrix;
+  function TxQuat(Q: TxBigMatrix): TxQuaternion;
   procedure Init(Obj, TypeInfoOfObj: Pointer; Offset: Integer = 0);
 
 
@@ -147,15 +148,15 @@ type
 // Point types
 //-----------------------
 type
-  TxScalarValue = Single;
-  TxScalarField = function(X, Y, Z: Single): TxScalarValue;
+  TxScalarValue = Extended;  // replaced with BigDecimals
+  TxScalarField = function(X, Y, Z: Extended): TxScalarValue;
 
-  // If data are made on integer XYZ index
+  // If data are made on integer XYZ index replaced with BigIntegers
   TxScalarFieldInt = function(iX, iY, iZ: Integer): TxScalarValue of object;
 
   TxVertex = record
     P, N: TVector3f;  //Point and Normal
-    Density: Single;
+    Density: Extended;
   end;
 
   TxFace = record
@@ -165,34 +166,34 @@ type
     V3: TVector3f; // vertex 3
     Padding: array [0 .. 1] of Byte;
   end;
-  
+
   PxPoint2D = ^TxPoint2D;
   TxPoint2D = record
-    X: Single;
-    Y: Single;
+    X: Extended;
+    Y: Extended;
     public
-      function Create(X, Y : Single): TxPoint2D;
-      procedure SetPosition(const X, Y : Single);
+      function Create(X, Y: Extended): TxPoint2D;
+      procedure SetPosition(const X, Y : Extended);
       function Add(const APoint2D: TxPoint2D): TxPoint2D;
-      function Length: Single; //distance to origin
-      function Distance(const APoint2D : TxPoint2D) : Single;
+      function Length: Extended; //distance to origin
+      function Distance(const APoint2D : TxPoint2D) : Extended;
       class function PointInCircle(const Point, Center: TxPoint2D;
         const Radius: Integer):Boolean; static; inline;
-      procedure Offset(const ADeltaX, ADeltaY : Single);
+      procedure Offset(const ADeltaX, ADeltaY : Extended);
   end;
 
   PxPoint3D = ^TxPoint3D;
   TxPoint3D = record
-    X: Single;
-    Y: Single;
-    Z: Single;
+    X: Extended;
+    Y: Extended;
+    Z: Extended;
     public
-      function Create(X, Y, Z: Single): TxPoint3D;
-      procedure SetPosition(const X, Y, Z : Single);
+      function Create(X, Y, Z: Extended): TxPoint3D;
+      procedure SetPosition(const X, Y, Z: Extended);
       function Add(const AGLPoint3D: TxPoint3D): TxPoint3D;
       function Length: Single; //distance to origin
-      function Distance(const APoint3D : TxPoint3D) : Single;
-      procedure Offset(const ADeltaX, ADeltaY, ADeltaZ : Single);
+      function Distance(const APoint3D : TxPoint3D) : Extended;
+      procedure Offset(const ADeltaX, ADeltaY, ADeltaZ : Extended);
   end;
 
 
@@ -217,22 +218,22 @@ type
 // Vector types
 //-----------------------
 
-  TxVector2DType = array [0..1] of Single;
-  TxVector3DType = array [0..2] of Single;
+  TxVector2DType = array [0..1] of Extended;
+  TxVector3DType = array [0..2] of Extended;
 
   TxVector2D = record
       function Create(const AX, AY, AW : Single): TxVector2D;
       function Add(const AVector2D: TxVector2D): TxVector2D;
-      function Length: Single;
-      function Norm: Single;
+      function Length: Extended;
+      function Norm: Extended;
       function Normalize: TxVector2D;
       function CrossProduct(const AVector: TxVector2D): TxVector2D;
-      function DotProduct(const AVector: TxVector2D): Single;
+      function DotProduct(const AVector: TxVector2D): Extended;
     case Integer of
       0: (V: TxVector2DType;);
-      1: (X: Single;
-          Y: Single;
-          W: Single;)
+      1: (X: Extended;
+          Y: Extended;
+          W: Extended;)
   end;
 
   TxVector3D = record
@@ -245,10 +246,10 @@ type
       function DotProduct(const AVector3D: TVector3D): Single; inline;
     case Integer of
       0: (V: TxVector3DType;);
-      1: (X: Single;
-          Y: Single;
-          Z: Single;
-          W: Single;)
+      1: (X: Extended;
+          Y: Extended;
+          Z: Extended;
+          W: Extended;)
   end;
 
 // Vector Arrays
@@ -382,17 +383,17 @@ implementation
 //---------------------------------------------------------------
 
 
-function TxVec(V: TAbstractVector): TxVector;
+function TxVec(V: TxBigMatrix): TxVector;
 begin
   Result.Create(V);
 end;
 
-function TxMat(M: TAbstractMatrix): TxMatrix;
+function TxMat(M: T_BigMatrix): TxMatrix;
 begin
   Result.Create(M);
 end;
 
-function TxQuat(Q: TAbstractVector): TxQuaternion;
+function TxQuat(Q: TxBigMatrix): TxQuaternion;
 begin
   Result.Create(Q);
 end;
@@ -467,7 +468,7 @@ begin
   SetLength(FData, FRowsCount, FColsCount);
 end;
 
-constructor TxMatrix.Create(M: TAbstractMatrix);
+constructor TxMatrix.Create(M: T_BigMatrix);
 var
   I: Integer;
 begin
@@ -542,7 +543,7 @@ begin
     Result.FData[I] := FData[Row - 1, I];
 end;
 
-class operator TxMatrix.Implicit(M: TAbstractMatrix): TxMatrix;
+class operator TxMatrix.Implicit(M: T_BigMatrix): TxMatrix;
 begin
   Result.Create(M);
 end;
@@ -779,7 +780,7 @@ end;
 // TxVector
 //-----------------------------
 
-constructor TxVector.Create(V: TAbstractVector);
+constructor TxVector.Create(V: TxBigMatrix);
 begin
   FCount := Length(V);
   FData := Copy(V);
@@ -839,7 +840,7 @@ begin
   Result := V * (1 / Scalar);
 end;
 
-class operator TxVector.Implicit(V: TAbstractVector): TxVector;
+class operator TxVector.Implicit(V: TxBigMatrix): TxVector;
 begin
   Result.Create(V);
 end;
@@ -1100,19 +1101,19 @@ end;
 // TxPoint2D
 //-----------------------------
 
-function TxPoint2D.Create(X, Y : Single): TxPoint2D;
+function TxPoint2D.Create(X, Y : Extended): TxPoint2D;
 begin
   Result.X := X;
   Result.Y := Y;
 end;
 
-procedure TxPoint2D.SetPosition(const X, Y: Single);
+procedure TxPoint2D.SetPosition(const X, Y: Extended);
 begin
   Self.X := X;
   Self.Y := Y;
 end;
 
-function TxPoint2D.Length: Single;
+function TxPoint2D.Length: Extended;
 begin
   Result := Sqrt(Self.X * Self.X + Self.Y * Self.Y);
 end;
@@ -1122,12 +1123,12 @@ begin
   Result.SetPosition(Self.X + APoint2D.X, Self.Y + APoint2D.Y);
 end;
 
-function TxPoint2D.Distance(const APoint2D: TxPoint2D): Single;
+function TxPoint2D.Distance(const APoint2D: TxPoint2D): Extended;
 begin
   Result := Sqrt(Sqr(Self.X - APoint2D.X) +  Sqr(Self.Y - APoint2D.Y));
 end;
 
-procedure TxPoint2D.Offset(const ADeltaX, ADeltaY: Single);
+procedure TxPoint2D.Offset(const ADeltaX, ADeltaY: Extended);
 begin
   Self.X := Self.X + ADeltaX;
   Self.Y := Self.Y + ADeltaY;
@@ -1143,7 +1144,7 @@ end;
 // TxPoint3D
 //-----------------------------
 
-function TxPoint3D.Create(X, Y, Z: Single): TxPoint3D;
+function TxPoint3D.Create(X, Y, Z: Extended): TxPoint3D;
 begin
   Result.X := X;
   Result.Y := Y;
@@ -1157,7 +1158,7 @@ begin
   Result.Z := Self.Z + AGLPoint3D.Z;
 end;
 
-function TxPoint3D.Distance(const APoint3D: TxPoint3D): Single;
+function TxPoint3D.Distance(const APoint3D: TxPoint3D): Extended;
 begin
   Result := Self.Length - APoint3D.Length;
 end;
@@ -1167,14 +1168,14 @@ begin
   Result := Sqrt(Self.X * Self.X + Self.Y * Self.Y + Self.Z * Self.Z);
 end;
 
-procedure TxPoint3D.Offset(const ADeltaX, ADeltaY, ADeltaZ: Single);
+procedure TxPoint3D.Offset(const ADeltaX, ADeltaY, ADeltaZ: Extended);
 begin
   Self.X := Self.X + ADeltaX;
   Self.Y := Self.Y + ADeltaY;
   Self.Z := Self.Z + ADeltaZ;
 end;
 
-procedure TxPoint3D.SetPosition(const X, Y, Z: Single);
+procedure TxPoint3D.SetPosition(const X, Y, Z: Extended);
 begin
   Self.X := X;
   Self.Y := Y;
@@ -1199,7 +1200,7 @@ begin
   Result.W := (Self.X * AVector.Y) - (Self.Y * AVector.X);
 end;
 
-function TxVector2D.DotProduct(const AVector: TxVector2D): Single;
+function TxVector2D.DotProduct(const AVector: TxVector2D): Extended;
 begin
   Result := (Self.X * AVector.X) + (Self.Y * AVector.Y) + (Self.W * AVector.W);
 end;
@@ -1211,12 +1212,12 @@ begin
   Result.W := 1.0;
 end;
 
-function TxVector2D.Length: Single;
+function TxVector2D.Length: Extended;
 begin
   Result := Sqrt((Self.X * Self.X) + (Self.Y * Self.Y));
 end;
 
-function TxVector2D.Norm: Single;
+function TxVector2D.Norm: Extended;
 begin
   Result := Sqr(Self.X) + Sqr(Self.Y);
 end;
@@ -1311,7 +1312,7 @@ begin
   Result := FData[Index];
 end;
 
-class operator TxQuaternion.Implicit(V: TAbstractVector): TxQuaternion;
+class operator TxQuaternion.Implicit(V: TxBigMatrix): TxQuaternion;
 begin
   if (Length(V) <> 4) then
     raise EMathError.Create(sWRONG_SIZE);
@@ -1344,7 +1345,7 @@ begin
   Result := Mat * Q2;
 end;
 
-constructor TxQuaternion.Create(Q: TAbstractVector);
+constructor TxQuaternion.Create(Q: TxBigMatrix);
 begin
   if Length(Q) <> 4 then
     raise EMathError.Create(sWRONG_SIZE);

+ 1 - 1
Source/GLSL.TextureShaders.pas

@@ -880,6 +880,6 @@ initialization
 //================================================
 
   RegisterClasses([TGLTextureSharingShader, TGLTextureSharingShaderMaterials,
-                   TGLTextureSharingShaderMaterial, TGLTexCombineShader]);
+                   TGLTextureSharingShaderMaterial]);
 
 end.

+ 2944 - 2949
Source/GLS.CUDA.pas → Source/GPU.CUDA.pas

@@ -1,2949 +1,2944 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDA;
-
-(* CUDA routines implementation *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  System.Types,
-  System.Classes,
-  System.SysUtils,
-
-  GLS.PersistentClasses,
-  GLS.BaseClasses,
-  GLS.Context,
-  GLS.VectorGeometry,
-  GLS.VectorTypes,
-  GLS.VectorLists,
-  GLS.Graphics,
-  GLS.Strings,
-  GLS.Utils,
-
-  GLS.CUDAApi,
-  GLS.CUDARunTime,
-  GLS.CUDAParser,
-  GLS.CUDAFourierTransform,
-  GLS.CUDACompiler,
-  GLS.CUDAContext,
-  GLS.CUDADataAccess
-  {$IFDEF USE_LOGGING},GLS.Logger;{$ELSE};{$ENDIF}
-
-type
-  TCUDAChange = (cuchDevice, cuchContext, cuchSize, cuchAddresMode, cuchFlag,
-    cuchFilterMode, cuchArray, cuchFormat, cuchMapping);
-  TCUDAChanges = set of TCUDAChange;
-
-  TCuAddresMode = (amWrap, amClamp, amMirror);
-  TCuFilterMode = (fmPoint, fmLinear);
-
-  TCUDAChannelType = (ctUndefined, ctUInt8, ctUInt16, ctUInt32, ctInt8, ctInt16,
-    ctInt32, ctHalfFloat, ctFloat, ctDouble);
-
-type
-
-  TCUDAChannelNum = (cnOne, cnTwo, cnThree, cnFour);
-
-  TChannelTypeAndNum = record
-    F: TCUDAChannelType;
-    C: TCUDAChannelNum;
-  end;
-
-  TCUDAMapping = (grmDefault, grmReadOnly, grmWriteDiscard);
-
-  TCUDAComponent = class(TCUDAHandlesMaster)
-  private
-    FMaster: TCUDAComponent;
-    FItems: TPersistentObjectList;
-    procedure SetMaster(AMaster: TCUDAComponent);
-    function GetItem(const i: Integer): TCUDAComponent;
-    function GetItemsCount: Integer;
-  protected
-    FStatus: TCUresult;
-    FChanges: TCUDAChanges;
-    function GetContext: TCUDAContext; override;
-    procedure CollectStatus(AStatus: TCUresult);
-    procedure GetChildren(AProc: TGetChildProc; Root: TComponent); override;
-    procedure AddItem(AItem: TCUDAComponent);
-    procedure RemoveItem(AItem: TCUDAComponent);
-    procedure DeleteItems;
-    procedure SetName(const NewName: TComponentName); override;
-    function GetIsAllocated: Boolean; virtual; abstract;
-  public
-    destructor Destroy; override;
-    procedure CuNotifyChange(AChange: TCUDAChange); virtual;
-    function GetParentComponent: TComponent; override;
-    procedure SetParentComponent(Value: TComponent); override;
-    function HasParent: Boolean; override;
-    function GetItemByName(const name: string): TCUDAComponent;
-    function MakeUniqueName(const BaseName: string): string;
-    property Master: TCUDAComponent read FMaster write SetMaster;
-    property Context: TCUDAContext read GetContext;
-    property Items[const i: Integer]: TCUDAComponent read GetItem;
-    property ItemsCount: Integer read GetItemsCount;
-    property Status: TCUresult read FStatus;
-    // Return true if handle is allocated (i.e. component has device object)
-    property IsAllocated: Boolean read GetIsAllocated;
-  end;
-
-  TCUDAComponentClass = class of TCUDAComponent;
-
-  TCUDAMemData = class;
-  TCUDAFunction = class;
-  TCUDATexture = class;
-  TGLSCUDA = class;
-  TCUDAConstant = class;
-
-  TCUDAModule = class(TCUDAComponent)
-  private
-    FHandle: PCUmodule;
-    FCode: TStringList;
-    FCodeType: TGLSCUDACompilerOutput;
-    FCompiler: TGLSCUDACompiler;
-    procedure SetCode(const Value: TStringList);
-    procedure SetCompiler(const Value: TGLSCUDACompiler);
-    function GetKernelFunction(const AName: string): TCUDAFunction;
-    function GetKernelTexture(const AName: string): TCUDATexture;
-    function GetKernelConstant(const AName: string): TCUDAConstant;
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    procedure OnChangeCode(Sender: TObject);
-    procedure Loaded; override;
-    function GetContext: TCUDAContext; override;
-    function GetIsAllocated: Boolean; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure Assign(Source: TPersistent); override;
-    procedure LoadFromFile(const AFilename: string);
-    procedure LoadFromSource;
-    procedure Unload;
-    procedure LoadAndCompile;
-    property Context: TCUDAContext read GetContext;
-    property CodeType: TGLSCUDACompilerOutput read FCodeType;
-    property KernelFunction[const AName: string]: TCUDAFunction
-      read GetKernelFunction;
-    property KernelTexture[const AName: string]: TCUDATexture
-      read GetKernelTexture;
-    property KernelConstant[const AName: string]: TCUDAConstant
-      read GetKernelConstant;
-  published
-    property Code: TStringList read FCode write SetCode;
-    property Compiler: TGLSCUDACompiler read FCompiler write SetCompiler;
-  end;
-
-  TGLResourceType = (rtTexture, rtBuffer);
-
-  //  Abstract class of graphic resources.
-  TCUDAGraphicResource = class(TCUDAComponent)
-  protected
-    FHandle: array [0 .. 7] of PCUgraphicsResource;
-    FMapping: TCUDAMapping;
-    FResourceType: TGLResourceType;
-    FGLContextHandle: TGLVirtualHandle;
-    FMapCounter: Integer;
-    function GetIsAllocated: Boolean; override;
-    procedure OnGLHandleAllocate(Sender: TGLVirtualHandle;
-      var Handle: Cardinal);
-    procedure OnGLHandleDestroy(Sender: TGLVirtualHandle; var Handle: Cardinal);
-    procedure BindArrayToTexture(var cudaArray: TCUDAMemData;
-      ALeyer, ALevel: LongWord); virtual; abstract;
-    procedure SetArray(var AArray: TCUDAMemData; AHandle: PCUarray;
-      ForGLTexture, Volume: Boolean);
-    function GetAttributeArraySize(const Attr: string): LongWord; virtual; abstract;
-    function GetAttributeArrayAddress(const Attr: string): Pointer; virtual;
-      abstract;
-    function GetElementArrayDataSize: LongWord; virtual; abstract;
-    function GetElementArrayAddress: Pointer; virtual; abstract;
-    procedure SetMapping(const Value: TCUDAMapping); virtual;
-    property Mapping: TCUDAMapping read FMapping write SetMapping
-      default grmDefault;
-  public
-    procedure MapResources; virtual; abstract;
-    procedure UnMapResources; virtual; abstract;
-  end;
-
-  TCUDAMemType = (mtHost, mtDevice, mtArray);
-  TCUDAMemMapFlag =
-  (
-    mmfPortable, // Memory is shared between contexts
-    mmfFastWrite // Fast write, slow read
-  );
-  TCUDAMemMapFlags = set of TCUDAMemMapFlag;
-
-  TCUDAMemData = class(TCUDAComponent)
-  private
-    FData: TCUdeviceptr;
-    FMappedMemory: TCUdeviceptr;
-    FHandle: PCUarray;
-    FWidth: Integer;
-    FHeight: Integer;
-    FDepth: Integer;
-    FPitch: Cardinal;
-    FElementSize: Integer;
-    FDataSize: Integer;
-    FChannelsType: TCUDAChannelType;
-    fChannelsNum: TCUDAChannelNum;
-    FMemoryType: TCUDAMemType;
-    FTexture: TCUDATexture;
-    FOpenGLRefArray: Boolean;
-    FMapping: Boolean;
-    procedure SetMemoryType(const AType: TCUDAMemType);
-    procedure SetWidth(const Value: Integer);
-    procedure SetHeight(const Value: Integer);
-    procedure SetDepth(const Value: Integer);
-    procedure SetChannelType(const Value: TCUDAChannelType);
-    procedure SetChannelNum(const Value: TCUDAChannelNum);
-    function GetData: TCUdeviceptr;
-    function GetArrayHandle: PCUarray;
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    function GetIsAllocated: Boolean; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure CuNotifyChange(AChange: TCUDAChange); override;
-    (* Map device and array memory to host or host memory to device.
-       Mapping is necessary for modifying device data.
-       When mapped host memory - it can be accessed in device side
-       via MappedHostAddress. *)
-    procedure Map(const AFlags: TCUDAMemMapFlags = []);
-    // Done mapping operation.
-    procedure UnMap;
-    function Data<EType>(X: Integer): GCUDAHostElementAccess<EType>; overload;
-    function Data<EType>(X, Y: Integer): GCUDAHostElementAccess<EType>; overload;
-    function Data<EType>(X, Y, Z: Integer): GCUDAHostElementAccess<EType>; overload;
-    //  Fill device data
-    procedure FillMem(const Value);
-    procedure CopyTo(const ADstMemData: TCUDAMemData); overload;
-    procedure CopyTo(const AGLImage: TGLImage); overload;
-    //  Copy data to Graphic resource.
-    procedure CopyTo(const AGLGraphic: TCUDAGraphicResource;
-      aAttr: string = ''); overload;
-    procedure CopyFrom(const ASrcMemData: TCUDAMemData); overload;
-    procedure CopyFrom(const AGLImage: TGLBitmap32); overload;
-    procedure CopyFrom(const AGLGraphic: TCUDAGraphicResource;
-      aAttr: string = ''); overload;
-    procedure SubCopyTo(const ADstMemData: TCUDAMemData;
-      ASrcXYZ, ADstXYZ, ASizes: IntElement.TVector3);
-    property ElementSize: Integer read FElementSize;
-    property DataSize: Integer read FDataSize;
-    property Pitch: Cardinal read fPitch;
-    property RawData: TCUdeviceptr read GetData;
-    property MappedMemoryAddress: TCUdeviceptr read FMappedMemory;
-    property ArrayHandle: PCUarray read GetArrayHandle;
-  published
-    property Width: Integer read fWidth write SetWidth default 256;
-    property Height: Integer read fHeight write SetHeight default 0;
-    property Depth: Integer read fDepth write SetDepth default 0;
-    property MemoryType: TCUDAMemType read FMemoryType write SetMemoryType
-      default mtHost;
-    property ChannelsType: TCUDAChannelType read fChannelsType
-      write SetChannelType default ctInt8;
-    property ChannelsNum: TCUDAChannelNum read fChannelsNum write SetChannelNum
-      default cnOne;
-  end;
-
-  TCUDAUniform = class(TCUDAComponent)
-  protected
-    FHandle: TCUdeviceptr;
-    FSize: Cardinal;
-    FKernelName: string;
-    FType: TCUDAType;
-    FCustomType: string;
-    FRef: Boolean;
-    FDefined: Boolean;
-    procedure SetKernelName(const AName: string);
-    procedure SetType(AValue: TCUDAType);
-    procedure SetCustomType(const AValue: string);
-    procedure SetSize(const AValue: Cardinal);
-    procedure SetRef(AValue: Boolean);
-    procedure SetDefined(AValue: Boolean);
-
-    property KernelName: string read FKernelName write SetKernelName;
-    property DataType: TCUDAType read FType write SetType;
-    property CustomType: string read FCustomType write SetCustomType;
-    property Size: Cardinal read FSize write SetSize;
-    property Reference: Boolean read FRef write SetRef;
-    function GetIsAllocated: Boolean; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    property IsValueDefined: Boolean read FDefined write SetDefined;
-  end;
-
-
-  TCUDAConstant = class(TCUDAUniform)
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    function GetDeviceAddress: TCUdeviceptr;
-  public
-    property DeviceAddress: TCUdeviceptr read GetDeviceAddress;
-  published
-    property KernelName;
-    property DataType;
-    property CustomType;
-    property Size;
-    property Reference;
-  end;
-
-  TCUDAFuncParam = class(TCUDAUniform)
-  private
-     
-  protected
-    { Protected declaration }
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-  published
-    property KernelName;
-    property DataType;
-    property CustomType;
-    property Size;
-    property Reference;
-  end;
-
-  TCUDAFunction = class(TCUDAComponent)
-  private
-    FKernelName: string;
-    FHandle: PCUfunction;
-    FAutoSync: Boolean;
-    FBlockShape: TCUDADimensions;
-    FGrid: TCUDADimensions;
-    ParamOffset: Integer;
-    FLaunching: Boolean;
-    FOnParameterSetup: TNotifyEvent;
-    procedure SetBlockShape(const AShape: TCUDADimensions);
-    procedure SetGrid(const AGrid: TCUDADimensions);
-    procedure SetKernelName(const AName: string);
-    function GetHandle: PCUfunction;
-    procedure SetSharedMemorySize(Value: Integer);
-    function GetSharedMemorySize: Integer;
-    function GetMaxThreadPerBlock: Integer;
-    function GetConstMemorySize: Integer;
-    function GetLocalMemorySize: Integer;
-    function GetNumRegisters: Integer;
-    function GetParameter(const AName: string): TCUDAFuncParam;
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    function GetIsAllocated: Boolean; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure SetParam(Value: Integer); overload;
-    procedure SetParam(Value: Cardinal); overload;
-    procedure SetParam(Value: Single); overload;
-    procedure SetParam(Value: TVector2i); overload;
-    procedure SetParam(Value: TVector3i); overload;
-    procedure SetParam(Value: TVector4i); overload;
-    procedure SetParam(Value: TVector2f); overload;
-    procedure SetParam(Value: TVector3f); overload;
-    procedure SetParam(Value: TVector4f); overload;
-    procedure SetParam(MemData: TCUDAMemData); overload;
-    procedure SetParam(TexRef: TCUDATexture); overload;
-    procedure SetParam(Ptr: Pointer); overload;
-    property Parameters[const AName: string]: TCUDAFuncParam read GetParameter;
-    procedure Launch(Grided: Boolean = true);
-    property Handle: PCUfunction read GetHandle;
-    property SharedMemorySize: Integer read GetSharedMemorySize
-      write SetSharedMemorySize;
-    property MaxThreadPerBlock: Integer read GetMaxThreadPerBlock;
-    property ConstMemorySize: Integer read GetConstMemorySize;
-    property LocalMemorySize: Integer read GetLocalMemorySize;
-    property NumRegisters: Integer read GetNumRegisters;
-  published
-    property KernelName: string read FKernelName write SetKernelName;
-    property AutoSync: Boolean read FAutoSync write FAutoSync default true;
-    property BlockShape: TCUDADimensions read FBlockShape write SetBlockShape;
-    property Grid: TCUDADimensions read FGrid write SetGrid;
-    property OnParameterSetup: TNotifyEvent read FOnParameterSetup
-      write FOnParameterSetup;
-  end;
-
-  TCUDATexture = class(TCUDAComponent)
-  private
-    FKernelName: string;
-    FHandle: PCUtexref;
-    fArray: TCUDAMemData;
-    fAddressModeS, fAddressModeT, fAddressModeR: TCuAddresMode;
-    fNormalizedCoord: Boolean;
-    fReadAsInteger: Boolean;
-    fFilterMode: TCuFilterMode;
-    fFormat: TCUDAChannelType;
-    fChannelNum: TCUDAChannelNum;
-    procedure SetKernelName(const AName: string);
-    procedure SetAddressModeS(const AMode: TCuAddresMode);
-    procedure SetAddressModeT(const AMode: TCuAddresMode);
-    procedure SetAddressModeR(const AMode: TCuAddresMode);
-    procedure SetNormalizedCoord(const flag: Boolean);
-    procedure SetReadAsInteger(const flag: Boolean);
-    procedure SetFilterMode(const mode: TCuFilterMode);
-    procedure SetFormat(AValue: TCUDAChannelType);
-    procedure SetChannelNum(AValue: TCUDAChannelNum);
-    procedure SetArray(Value: TCUDAMemData);
-    function GetHandle: PCUtexref;
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    function GetIsAllocated: Boolean; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    property Handle: PCUtexref read GetHandle;
-  published
-    property KernelName: string read FKernelName write SetKernelName;
-    property AddressModeS: TCuAddresMode read fAddressModeS
-      write SetAddressModeS default amClamp;
-    property AddressModeT: TCuAddresMode read fAddressModeT
-      write SetAddressModeT default amClamp;
-    property AddressModeR: TCuAddresMode read fAddressModeR
-      write SetAddressModeR default amClamp;
-    property NormalizedCoord: Boolean read fNormalizedCoord
-      write SetNormalizedCoord default true;
-    property ReadAsInteger: Boolean read fReadAsInteger write SetReadAsInteger
-      default false;
-    property FilterMode: TCuFilterMode read fFilterMode write SetFilterMode
-      default fmPoint;
-    property Format: TCUDAChannelType read fFormat write SetFormat;
-    property ChannelNum: TCUDAChannelNum read fChannelNum write SetChannelNum;
-    property MemDataArray: TCUDAMemData read fArray write SetArray;
-  end;
-
-  TGLSCUDA = class(TCUDAComponent)
-  private
-    fDevice: TGLSCUDADevice;
-    fContext: TCUDAContext;
-    FOnOpenGLInteropInit: TOnOpenGLInteropInit;
-    procedure SetDevice(const Value: TGLSCUDADevice);
-    procedure SetOnOpenGLInteropInit(AEvent: TOnOpenGLInteropInit);
-    function GetModule(const i: Integer): TCUDAModule;
-  protected
-    procedure Notification(AComponent: TComponent;
-      Operation: TOperation); override;
-    function GetContext: TCUDAContext; override;
-    function GetIsAllocated: Boolean; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    property Context: TCUDAContext read GetContext;
-    property Modules[const i: Integer]: TCUDAModule read GetModule;
-  published
-    
-    property ComputingDevice: TGLSCUDADevice read fDevice write SetDevice;
-    property OnOpenGLInteropInit: TOnOpenGLInteropInit read FOnOpenGLInteropInit
-      write SetOnOpenGLInteropInit;
-  end;
-
-function GetChannelTypeAndNum(AType: TCUDAType): TChannelTypeAndNum;
-procedure RegisterCUDAComponentNameChangeEvent(ANotifyEvent: TNotifyEvent);
-procedure DeRegisterCUDAComponentNameChangeEvent;
-
-//-----------------------------------------------------------------
-implementation
-//-----------------------------------------------------------------
-
-
-const
-  cAddressMode: array [TCuAddresMode] of TCUaddress_mode =
-    (CU_TR_ADDRESS_MODE_WRAP, CU_TR_ADDRESS_MODE_CLAMP,
-    CU_TR_ADDRESS_MODE_MIRROR);
-
-  cFilterMode: array [TCuFilterMode] of TCUfilter_mode =
-    (CU_TR_FILTER_MODE_POINT, CU_TR_FILTER_MODE_LINEAR);
-
-const
-  cCUDATypeToTexFormat: array [TCUDAType] of TChannelTypeAndNum =
-    ((F: ctUndefined; C: cnOne), (F: ctInt8; C: cnOne), (F: ctUInt8; C: cnOne),
-    (F: ctInt8; C: cnTwo), (F: ctUInt8; C: cnTwo), (F: ctInt8; C: cnThree),
-    (F: ctUInt8; C: cnThree), (F: ctInt8; C: cnFour), (F: ctUInt8; C: cnFour),
-    (F: ctInt16; C: cnOne), (F: ctUInt16; C: cnOne), (F: ctInt16; C: cnTwo),
-    (F: ctUInt16; C: cnTwo), (F: ctInt16; C: cnThree), (F: ctUInt16;
-    C: cnThree), (F: ctInt16; C: cnFour), (F: ctUInt16; C: cnFour), (F: ctInt32;
-    C: cnOne), (F: ctUInt32; C: cnOne), (F: ctInt32; C: cnTwo), (F: ctUInt32;
-    C: cnTwo), (F: ctInt32; C: cnThree), (F: ctUInt32; C: cnThree), (F: ctInt32;
-    C: cnFour), (F: ctUInt32; C: cnFour), (F: ctUndefined; C: cnOne),
-    (F: ctUndefined; C: cnOne), (F: ctUndefined; C: cnTwo), (F: ctUndefined;
-    C: cnTwo), (F: ctUndefined; C: cnThree), (F: ctUndefined; C: cnThree),
-    (F: ctUndefined; C: cnFour), (F: ctUndefined; C: cnFour), (F: ctFloat;
-    C: cnOne), (F: ctFloat; C: cnTwo), (F: ctFloat; C: cnThree), (F: ctFloat;
-    C: cnFour), (F: ctUndefined; C: cnOne), (F: ctUndefined; C: cnOne),
-    (F: ctUndefined; C: cnTwo), (F: ctUndefined; C: cnTwo), (F: ctUndefined;
-    C: cnThree), (F: ctUndefined; C: cnThree), (F: ctUndefined; C: cnFour),
-    (F: ctUndefined; C: cnFour), (F: ctUndefined; C: cnOne), (F: ctUndefined;
-    C: cnTwo), (F: ctUndefined; C: cnThree), (F: ctUndefined; C: cnFour),
-    (F: ctInt8; C: cnOne), (F: ctInt16; C: cnOne), (F: ctInt32; C: cnOne),
-    (F: ctUInt8; C: cnOne), (F: ctUInt16; C: cnOne), (F: ctUInt32; C: cnOne));
-
-  cChannelTypeSize: array [TCUDAChannelType] of Integer =
-      (0, 1, 2, 4, 1, 2, 4, 2, 4, 8);
-
-var
-  GLVirtualHandleCounter: Cardinal = 1;
-  vCUDAComponentNameChangeEvent: TNotifyEvent;
-
-function GetChannelTypeAndNum(AType: TCUDAType): TChannelTypeAndNum;
-begin
-  Result := cCUDATypeToTexFormat[AType];
-end;
-
-procedure CUDAEnumToChannelDesc(const Fmt: TCUarray_format; const nCh: LongWord;
-  out oFormat: TCUDAChannelType; out oNum: TCUDAChannelNum);
-begin
-  case Fmt of
-    CU_AD_FORMAT_UNSIGNED_INT8:
-      oFormat := ctUInt8;
-    CU_AD_FORMAT_UNSIGNED_INT16:
-      oFormat := ctUInt16;
-    CU_AD_FORMAT_UNSIGNED_INT32:
-      oFormat := ctUInt32;
-    CU_AD_FORMAT_SIGNED_INT8:
-      oFormat := ctUInt8;
-    CU_AD_FORMAT_SIGNED_INT16:
-      oFormat := ctUInt16;
-    CU_AD_FORMAT_SIGNED_INT32:
-      oFormat := ctUInt32;
-    CU_AD_FORMAT_HALF:
-      oFormat := ctHalfFloat;
-    CU_AD_FORMAT_FLOAT:
-      oFormat := ctFloat;
-  end;
-  case nCh of
-    1: oNum := cnOne;
-    2: oNum := cnTwo;
-    3: oNum := cnThree;
-    4: oNum := cnFour;
-  end;
-end;
-
-procedure RegisterCUDAComponentNameChangeEvent(ANotifyEvent: TNotifyEvent);
-begin
-  vCUDAComponentNameChangeEvent := ANotifyEvent;
-end;
-
-procedure DeRegisterCUDAComponentNameChangeEvent;
-begin
-  vCUDAComponentNameChangeEvent := nil;
-end;
-
-// ------------------
-// ------------------ TGLSCUDA ------------------
-// ------------------
-
-constructor TGLSCUDA.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  fDevice := nil;
-  fContext := TCUDAContext.Create;
-  FChanges := [];
-end;
-
-destructor TGLSCUDA.Destroy;
-begin
-  ComputingDevice := nil;
-  fContext.Destroy;
-  inherited;
-end;
-
-procedure TGLSCUDA.Notification(AComponent: TComponent; Operation: TOperation);
-begin
-  if (Operation = opRemove) and (AComponent = fDevice) then
-    ComputingDevice := nil;
-  inherited;
-end;
-
-procedure TGLSCUDA.SetDevice(const Value: TGLSCUDADevice);
-begin
-  if Value <> fDevice then
-  begin
-    if Assigned(Value) and not Value.Suitable then
-      exit;
-    if Assigned(fDevice) then
-      fDevice.RemoveFreeNotification(Self);
-    fDevice := Value;
-    if Assigned(fDevice) then
-    begin
-      fDevice.FreeNotification(Self);
-      CuNotifyChange(cuchDevice);
-    end;
-  end;
-end;
-
-procedure TGLSCUDA.SetOnOpenGLInteropInit(AEvent: TOnOpenGLInteropInit);
-begin
-  FOnOpenGLInteropInit := AEvent;
-  CuNotifyChange(cuchContext);
-end;
-
-function TGLSCUDA.GetContext: TCUDAContext;
-begin
-  if cuchDevice in FChanges then
-  begin
-    if Assigned(fDevice) then
-      fContext.Device := fDevice.Device
-    else
-      fContext.Device := nil;
-    Exclude(FChanges, cuchDevice);
-    Include(FChanges, cuchContext);
-  end;
-
-  if (cuchContext in FChanges) and Assigned(fDevice) then
-  begin
-    // Getting OpenGL context to make interoperability
-    fContext.OnOpenGLInteropInit := FOnOpenGLInteropInit;
-    CUDAContextManager.CreateContext(fContext);
-    Exclude(FChanges, cuchContext);
-  end;
-
-  Result := fContext;
-end;
-
-function TGLSCUDA.GetIsAllocated: Boolean;
-begin
-  Result := FContext.IsValid;
-end;
-
-function TGLSCUDA.GetModule(const i: Integer): TCUDAModule;
-var
-  j, k: Integer;
-begin
-  Result := nil;
-  k := 0;
-  for j := 0 to FItems.Count - 1 do
-  begin
-    if FItems[j] is TCUDAModule then
-    begin
-      if k = i then
-        exit(TCUDAModule(FItems[j]))
-      else
-        Inc(k);
-    end;
-  end;
-end;
-
- 
-// ------------------
-// ------------------ TCUDAModule ------------------
-// ------------------
-
-constructor TCUDAModule.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  FHandle := nil;
-  FCode := TStringList.Create;
-  TStringList(FCode).OnChange := OnChangeCode;
-end;
-
-destructor TCUDAModule.Destroy;
-begin
-  Unload;
-  FCode.Destroy;
-  if Assigned(FCompiler) then
-    FCompiler.Product := nil;
-  inherited;
-end;
-
-procedure TCUDAModule.Assign(Source: TPersistent);
-var
-  module: TCUDAModule;
-begin
-  if Source is TCUDAModule then
-  begin
-    DestroyHandles;
-    module := TCUDAModule(Source);
-    FCode.Assign(module.FCode);
-    FCodeType := module.FCodeType;
-    AllocateHandles;
-  end;
-  inherited Assign(Source);
-end;
-
-procedure TCUDAModule.SetCompiler(const Value: TGLSCUDACompiler);
-begin
-  if Value <> FCompiler then
-  begin
-    // Compiler must used by only one module
-    if Assigned(Value) and Assigned(Value.Product) then
-      exit;
-    FCompiler := Value;
-    if Assigned(FCompiler) then
-      FCompiler.Product := FCode;
-  end;
-end;
-
-function TCUDAModule.GetContext: TCUDAContext;
-begin
-  if Assigned(FMaster) and (FMaster is TGLSCUDA) then
-    Result := TGLSCUDA(FMaster).Context
-  else
-  begin
-    Result := nil;
-    {$IFDEF USE_LOGGING}
-      LogErrorFmt('Invalid master of module "%s"', [Name]);
-    {$ENDIF}
-    Abort;
-  end;
-end;
-
-function TCUDAModule.GetIsAllocated: Boolean;
-begin
-  Result := Assigned(FHandle);
-end;
-
-procedure TCUDAModule.Loaded;
-var
-  I: Integer;
-begin
-  inherited Loaded;
-  LoadFromSource;
-  for i := ItemsCount - 1 downto 0 do
-    Items[i].AllocateHandles;
-end;
-
-procedure TCUDAModule.AllocateHandles;
-var
-  func: TCUDAFunction;
-  tex: TCUDATexture;
-  cnst: TCUDAConstant;
-  Param: TCUDAFuncParam;
-  i, j: Integer;
-  useless: array of TCUDAComponent;
-  info: TCUDAModuleInfo;
-  bFail: Boolean;
-begin
-  LoadFromSource;
-
-  if Assigned(FCompiler) then
-  begin
-    info := FCompiler.ModuleInfo;
-    info.Owner := Self;
-
-    // Runtime module deployment
-    if not(csDesigning in ComponentState) and Assigned(FCompiler) then
-    begin
-
-      // Redefine function and texture with same names
-      for i := 0 to High(info.func) do
-      begin
-        func := GetKernelFunction(info.func[i].Name);
-        if not Assigned(func) then
-        begin
-          func := TCUDAFunction.Create(Self);
-          func.Master := Self;
-          func.FKernelName := info.func[i].KernelName;
-          func.Name := MakeUniqueName(info.func[i].Name);
-        end
-        else
-          func.DeleteItems;
-
-        try
-          bFail := func.Handle = nil;
-        except
-          bFail := True;
-        end;
-
-        if bFail then
-          func.Destroy
-        else
-        begin
-          for j := 0 to High(info.func[i].Args) do
-          begin
-            Param := TCUDAFuncParam.Create(func);
-            Param.Master := TCUDAComponent(func);
-            Param.FKernelName := info.func[i].Args[j].Name;
-            Param.Name := func.KernelName + '_' + Param.KernelName;
-            Param.FType := info.func[i].Args[j].DataType;
-            Param.FCustomType := info.func[i].Args[j].CustomType;
-            Param.FRef := info.func[i].Args[j].Ref;
-            // Lock properties
-            Param.AllocateHandles;
-          end;
-        end;
-
-      end;
-
-      for i := 0 to High(info.TexRef) do
-      begin
-        tex := GetKernelTexture(info.TexRef[i].Name);
-        if not Assigned(tex) then
-        begin
-          tex := TCUDATexture.Create(Self);
-          tex.Master := Self;
-          tex.FKernelName := info.TexRef[i].Name;
-          tex.fReadAsInteger :=
-            (info.TexRef[i].ReadMode = cudaReadModeElementType);
-          tex.fFormat := cCUDATypeToTexFormat[info.TexRef[i].DataType].F;
-          tex.fChannelNum := cCUDATypeToTexFormat[info.TexRef[i].DataType].C;
-          tex.Name := MakeUniqueName(tex.FKernelName);
-        end;
-
-        try
-          bFail := tex.Handle = nil;
-        except
-          bFail := True;
-        end;
-
-        if bFail then
-          tex.Destroy;
-      end;
-
-      for i := 0 to High(info.Constant) do
-      begin
-        cnst := GetKernelConstant(info.Constant[i].Name);
-        if not Assigned(cnst) then
-        begin
-          cnst := TCUDAConstant.Create(Self);
-          cnst.Master := Self;
-          cnst.FKernelName := info.Constant[i].Name;
-          cnst.FType := info.Constant[i].DataType;
-          cnst.FCustomType := info.Constant[i].CustomType;
-          cnst.Name := MakeUniqueName(cnst.FKernelName);
-          cnst.IsValueDefined := info.Constant[i].DefValue;
-        end;
-
-        try
-          bFail := cnst.DeviceAddress = nil;
-        except
-          bFail := True;
-        end;
-
-        if bFail then
-          cnst.Destroy;
-      end;
-
-      // Delete useless components
-      SetLength(useless, ItemsCount);
-      j := 0;
-      for i := 0 to ItemsCount - 1 do
-        if not Items[i].IsAllocated then
-          begin
-            useless[j] := Items[i];
-            Inc(j);
-          end;
-      for i := 0 to j - 1 do
-        useless[i].Destroy;
-    end;
-  end;
-end;
-
-procedure TCUDAModule.DestroyHandles;
-var
-  I: Integer;
-begin
-  for I := 0 to ItemsCount - 1 do
-    TCUDAComponent(Items[I]).DestroyHandles;
-end;
-
-procedure TCUDAModule.LoadFromFile(const AFilename: string);
-var
-  Status: TCUresult;
-  ext: string;
-  AnsiFileName: AnsiString;
-begin
-  if FileExists(AFilename) then
-  begin
-    ext := ExtractFileExt(AFilename);
-    System.Delete(ext, 1, 1);
-    ext := AnsiLowerCase(ext);
-    FCodeType := codeUndefined;
-    if ext = 'ptx' then
-      FCodeType := codePtx;
-    if ext = 'cubin' then
-      FCodeType := codeCubin;
-    if ext = 'gpu' then
-      FCodeType := codeGpu;
-
-    if (FCodeType = codePtx) or (FCodeType = codeCubin) then
-    begin
-      Unload;
-      Context.Requires;
-      AnsiFileName := AnsiString(AFilename);
-      Status := cuModuleLoad(FHandle, PAnsiChar(AnsiFileName));
-      Context.Release;
-      if Status <> CUDA_SUCCESS then
-        Abort;
-      FCode.LoadFromFile(AFilename);
-      Compiler := nil;
-      AllocateHandles;
-    end
-    else
-      {$IFDEF USE_LOGGING}
-        LogErrorFmt('%s.LoadFromFile: file extension must be ptx or cubin', [Self.ClassName]);
-     {$ENDIF}
-  end
-  else
-   {$IFDEF USE_LOGGING}
-    LogErrorFmt(strFailedOpenFile, [AFilename]);
-   {$ENDIF}
-end;
-
-procedure TCUDAModule.LoadFromSource;
-var
-  Text: AnsiString;
-begin
-  Text := AnsiString(FCode.Text);
-  if Length(Text) > 0 then
-  begin
-    DestroyHandles;
-
-    Text := Text + #00;
-    Context.Requires;
-    FStatus := cuModuleLoadData(FHandle, PAnsiChar(Text));
-    Context.Release;
-    if FStatus <> CUDA_SUCCESS then
-      Abort;
-  end;
-end;
-
-procedure TCUDAModule.LoadAndCompile;
-begin
-  AllocateHandles;
-end;
-
-procedure TCUDAModule.Unload;
-begin
-  if Assigned(FHandle) then
-  begin
-    DestroyHandles;
-    DeleteItems;
-    Context.Requires;
-    FStatus := cuModuleUnload(FHandle);
-    Context.Release;
-    FHandle := nil;
-  end;
-end;
-
-procedure TCUDAModule.OnChangeCode(Sender: TObject);
-begin
-  if not(csLoading in ComponentState) and (Sender is TGLSCUDACompiler) then
-  begin
-    AllocateHandles;
-  end;
-end;
-
-procedure TCUDAModule.SetCode(const Value: TStringList);
-begin
-  FCode.Assign(Value);
-end;
-
-function TCUDAModule.GetKernelFunction(const AName: string): TCUDAFunction;
-var
-  i: Integer;
-  item: TComponent;
-begin
-  Result := nil;
-  for i := 0 to Self.ItemsCount - 1 do
-  begin
-    item := Items[i];
-    if item is TCUDAFunction then
-      if TCUDAFunction(item).KernelName = AName then
-        exit(TCUDAFunction(item));
-  end;
-end;
-
-function TCUDAModule.GetKernelTexture(const AName: string): TCUDATexture;
-var
-  i: Integer;
-  item: TComponent;
-begin
-  Result := nil;
-  for i := 0 to Self.ItemsCount - 1 do
-  begin
-    item := Items[i];
-    if item is TCUDATexture then
-      if TCUDATexture(item).KernelName = AName then
-        exit(TCUDATexture(item));
-  end;
-end;
-
-
-function TCUDAModule.GetKernelConstant(const AName: string): TCUDAConstant;
-var
-  i: Integer;
-  item: TComponent;
-begin
-  Result := nil;
-  for i := 0 to Self.ItemsCount - 1 do
-  begin
-    item := Items[i];
-    if item is TCUDAConstant then
-      if TCUDAConstant(item).KernelName = AName then
-        exit(TCUDAConstant(item));
-  end;
-end;
-
- 
-// ------------------
-// ------------------ TCUDAComponent ------------------
-// ------------------
-
-destructor TCUDAComponent.Destroy;
-begin
-  if Assigned(FMaster) then
-    FMaster.RemoveItem(Self);
-  if Assigned(FItems) then
-  begin
-    DeleteItems;
-    FItems.Free;
-  end;
-  inherited;
-end;
-
-procedure TCUDAComponent.CuNotifyChange(AChange: TCUDAChange);
-begin
-  Include(FChanges, AChange);
-end;
-
-function TCUDAComponent.GetContext: TCUDAContext;
-begin
-  if Self is TGLSCUDA then
-    Result := TGLSCUDA(Self).Context
-  else
-    Result := TGLSCUDA(FMaster).Context;
-end;
-
-procedure TCUDAComponent.CollectStatus(AStatus: TCUresult);
-begin
-  if AStatus <> CUDA_SUCCESS then
-    FStatus := AStatus;
-end;
-
-procedure TCUDAComponent.GetChildren(AProc: TGetChildProc; Root: TComponent);
-var
-  i: Integer;
-begin
-  if Assigned(FItems) then
-    for i := 0 to FItems.Count - 1 do
-      if not IsSubComponent(TComponent(FItems.List^[i])) then
-        AProc(TComponent(FItems.List^[i]));
-end;
-
-procedure TCUDAComponent.SetParentComponent(Value: TComponent);
-begin
-  inherited;
-  if Self is TGLSCUDA then
-    exit;
-  if Value <> FMaster then
-    Master := TCUDAComponent(Value);
-end;
-
-function TCUDAComponent.GetParentComponent: TComponent;
-begin
-  Result := FMaster;
-end;
-
-function TCUDAComponent.HasParent: Boolean;
-begin
-  Result := Assigned(FMaster);
-end;
-
-procedure TCUDAComponent.SetMaster(AMaster: TCUDAComponent);
-begin
-  if Assigned(FMaster) then
-    FMaster.RemoveItem(Self);
-  FMaster := AMaster;
-  if Assigned(FMaster) then
-    FMaster.AddItem(Self);
-end;
-
-procedure TCUDAComponent.SetName(const NewName: TComponentName);
-begin
-  if Name <> NewName then
-  begin
-    inherited SetName(NewName);
-    if Assigned(vCUDAComponentNameChangeEvent) then
-      vCUDAComponentNameChangeEvent(Self);
-  end;
-end;
-
-procedure TCUDAComponent.AddItem(AItem: TCUDAComponent);
-begin
-  if not Assigned(FItems) then
-    FItems := TPersistentObjectList.Create;
-  FItems.Add(AItem);
-end;
-
-procedure TCUDAComponent.RemoveItem(AItem: TCUDAComponent);
-begin
-  if not Assigned(FItems) then
-    exit;
-  if AItem.FMaster = Self then
-  begin
-    if AItem.Owner = Self then
-      RemoveComponent(AItem);
-    FItems.Remove(AItem);
-    AItem.FMaster := nil;
-  end;
-end;
-
-procedure TCUDAComponent.DeleteItems;
-var
-  child: TCUDAComponent;
-begin
-  if Assigned(FItems) then
-    while FItems.Count > 0 do
-    begin
-      child := TCUDAComponent(FItems.Pop);
-      child.Free;
-    end;
-end;
-
-function TCUDAComponent.GetItem(const i: Integer): TCUDAComponent;
-begin
-  if Assigned(FItems) and (i < FItems.Count) then
-    Result := TCUDAComponent(FItems[i])
-  else
-    Result := nil;
-end;
-
-function TCUDAComponent.GetItemsCount: Integer;
-begin
-  if Assigned(FItems) then
-    Result := FItems.Count
-  else
-    Result := 0;
-end;
-
-function TCUDAComponent.GetItemByName(const name: string): TCUDAComponent;
-var
-  i: Integer;
-begin
-  Result := nil;
-  for i := 0 to GetItemsCount - 1 do
-  begin
-    if Items[i].Name = name then
-    begin
-      Result := Items[i];
-      exit;
-    end;
-  end;
-end;
-
-function TCUDAComponent.MakeUniqueName(const BaseName: string): string;
-var
-  i: Integer;
-begin
-  Result := BaseName + '1';
-  i := 2;
-  while GetItemByName(Result) <> nil do
-  begin
-    Result := BaseName + IntToStr(i);
-    Inc(i);
-  end;
-end;
-
- 
-// ------------------
-// ------------------ TCUDAFunction ------------------
-// ------------------
-
-constructor TCUDAFunction.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  FHandle := nil;
-  FAutoSync := true;
-  FBlockShape := TCUDADimensions.Create(Self);
-  FGrid := TCUDADimensions.Create(Self);
-  FLaunching := false;
-end;
-
- 
-destructor TCUDAFunction.Destroy;
-begin
-  FBlockShape.Destroy;
-  FGrid.Destroy;
-  DestroyHandles;
-  inherited;
-end;
-
-procedure TCUDAFunction.AllocateHandles;
-var
-  LModule: TCUDAModule;
-  ansiname: AnsiString;
-  pFunc: PCUfunction;
-begin
-  DestroyHandles;
-
-  if not(FMaster is TCUDAModule) then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strModuleAbsent);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if Length(FKernelName) = 0 then
-    exit;
-
-  LModule := TCUDAModule(FMaster);
-  if not Assigned(LModule.FHandle) then
-    exit;
-
-  with LModule.Context.Device do
-  begin
-    FBlockShape.MaxSizeX := MaxThreadsDim.SizeX;
-    FBlockShape.MaxSizeY := MaxThreadsDim.SizeY;
-    FBlockShape.MaxSizeZ := MaxThreadsDim.SizeZ;
-    FGrid.MaxSizeX := MaxGridSize.SizeX;
-    FGrid.MaxSizeY := MaxGridSize.SizeY;
-    FGrid.MaxSizeZ := MaxGridSize.SizeZ;
-  end;
-
-  ansiname := AnsiString(FKernelName);
-  Context.Requires;
-  FStatus := cuModuleGetFunction(pFunc, LModule.FHandle, PAnsiChar(ansiname));
-  Context.Release;
-  if FStatus = CUDA_SUCCESS then
-    FHandle := pFunc
-  else
-    Abort;
-  inherited;
-end;
-
-procedure TCUDAFunction.DestroyHandles;
-var
-  i: Integer;
-  item: TComponent;
-begin
-  if Assigned(FHandle) then
-  begin
-    for i := 0 to ItemsCount - 1 do
-    begin
-      item := Items[i];
-      if item is TCUDAFuncParam then
-        TCUDAFuncParam(item).DestroyHandles;
-    end;
-    FHandle := nil;
-    inherited;
-  end;
-end;
-
-procedure TCUDAFunction.SetBlockShape(const AShape: TCUDADimensions);
-begin
-  FBlockShape.Assign(AShape);
-end;
-
-procedure TCUDAFunction.SetGrid(const AGrid: TCUDADimensions);
-begin
-  FGrid.Assign(AGrid);
-end;
-
-procedure TCUDAFunction.SetKernelName(const AName: string);
-begin
-  if csLoading in ComponentState then
-    FKernelName := AName
-  else if not Assigned(FHandle) then
-  begin
-    FKernelName := AName;
-    AllocateHandles;
-  end;
-end;
-
-procedure TCUDAFunction.SetParam(Value: Integer);
-begin
-  if not FLaunching then
-  begin
-   {$IFDEF USE_LOGGING}
-     LogError(strWrongParamSetup);
-   {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSeti(FHandle, ParamOffset, PCardinal(@Value)^);
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(Cardinal));
-end;
-
-procedure TCUDAFunction.SetParam(Value: Cardinal);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-     LogError(strWrongParamSetup);
-   {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSeti(FHandle, ParamOffset, Value);
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(Cardinal));
-end;
-
-procedure TCUDAFunction.SetParam(Value: Single);
-begin
-  if not FLaunching then
-  begin
-   {$IFDEF USE_LOGGING}
-     LogError(strWrongParamSetup);
-   {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetf(FHandle, ParamOffset, Value);
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(Single));
-end;
-
-procedure TCUDAFunction.SetParam(Value: TVector2i);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector2i));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(TVector2i));
-end;
-
-procedure TCUDAFunction.SetParam(Value: TVector3i);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector3i));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(TVector3i));
-end;
-
-procedure TCUDAFunction.SetParam(Value: TVector4i);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector4i));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(TVector4i));
-end;
-
-procedure TCUDAFunction.SetParam(Value: TVector2f);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector2f));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(TVector2f));
-end;
-
-procedure TCUDAFunction.SetParam(Value: TVector3f);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector3f));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(TVector4f));
-end;
-
-procedure TCUDAFunction.SetParam(Value: TVector4f);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector4f));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(TVector4f));
-end;
-
-procedure TCUDAFunction.SetParam(MemData: TCUDAMemData);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSeti(FHandle, ParamOffset, Cardinal(MemData.RawData));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(Cardinal));
-end;
-
-procedure TCUDAFunction.SetParam(TexRef: TCUDATexture);
-var
-  HTexRef: PCUtexref;
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  HTexRef := TexRef.Handle;
-  FStatus := cuParamSetTexRef(FHandle, CU_PARAM_TR_DEFAULT, HTexRef);
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-procedure TCUDAFunction.SetParam(Ptr: Pointer);
-begin
-  if not FLaunching then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strWrongParamSetup);
-    {$ENDIF}
-    Abort;
-  end;
-  FStatus := cuParamSeti(FHandle, ParamOffset, Cardinal(Ptr));
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  Inc(ParamOffset, SizeOf(Cardinal));
-end;
-
-procedure TCUDAFunction.Launch(Grided: Boolean = true);
-begin
-  if not(FMaster is TCUDAModule) then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strModuleAbsent);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if not Assigned(FHandle) then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogErrorFmt(strFuncNotConnected, [Self.ClassName]);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if FLaunching then
-    exit;
-
-  ParamOffset := 0;
-
-  Context.Requires;
-  FLaunching := true;
-  if Assigned(FOnParameterSetup) then
-    try
-      FOnParameterSetup(Self);
-    except
-      FLaunching := false;
-      Context.Release;
-      raise;
-    end;
-  FLaunching := false;
-
-  FStatus := cuParamSetSize(FHandle, ParamOffset);
-  CollectStatus(cuFuncSetBlockShape(FHandle, FBlockShape.SizeX,
-    FBlockShape.SizeY, FBlockShape.SizeZ));
-
-  if FStatus = CUDA_SUCCESS then
-  begin
-    // execute the kernel
-    if Grided then
-      FStatus := cuLaunchGrid(FHandle, FGrid.SizeX, FGrid.SizeY)
-    else
-      FStatus := cuLaunch(FHandle);
-    if FAutoSync then
-      CollectStatus(cuCtxSynchronize);
-  end;
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogErrorFmt(strLaunchFailed, [Self.Name]);
-    {$ENDIF}
-    Abort;
-  end;
-end;
-
-function TCUDAFunction.GetHandle: PCUfunction;
-begin
-  if FHandle = nil then
-    AllocateHandles;
-  Result := FHandle;
-end;
-
-function TCUDAFunction.GetIsAllocated: Boolean;
-begin
-  Result := Assigned(FHandle);
-end;
-
-function TCUDAFunction.GetMaxThreadPerBlock: Integer;
-begin
-  Context.Requires;
-  FStatus := cuFuncGetAttribute(Result,
-    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Handle);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-function TCUDAFunction.GetSharedMemorySize: Integer;
-begin
-  Context.Requires;
-  FStatus := cuFuncGetAttribute(Result,
-    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, Handle);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-procedure TCUDAFunction.SetSharedMemorySize(Value: Integer);
-var
-  MemPerBlock: NativeUInt;
-begin
-  Context.Requires;
-  MemPerBlock := TGLSCUDA(TCUDAModule(FMaster).FMaster)
-    .fDevice.Device.SharedMemPerBlock;
-  if Value < 0 then
-    Value := 0
-  else if Value > Integer(MemPerBlock) then
-    Value := MemPerBlock;
-  FStatus := cuFuncSetSharedSize(Handle, Value);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-function TCUDAFunction.GetConstMemorySize: Integer;
-begin
-  Context.Requires;
-  FStatus := cuFuncGetAttribute(Result,
-    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, Handle);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-function TCUDAFunction.GetLocalMemorySize: Integer;
-begin
-  Context.Requires;
-  FStatus := cuFuncGetAttribute(Result,
-    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, Handle);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-function TCUDAFunction.GetNumRegisters: Integer;
-begin
-  Context.Requires;
-  FStatus := cuFuncGetAttribute(Result, CU_FUNC_ATTRIBUTE_NUM_REGS, Handle);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-function TCUDAFunction.GetParameter(const AName: string): TCUDAFuncParam;
-var
-  i: Integer;
-  item: TComponent;
-begin
-  Result := nil;
-  for i := 0 to Self.ItemsCount - 1 do
-  begin
-    item := Items[i];
-    if item is TCUDAFuncParam then
-      if TCUDAFuncParam(item).KernelName = AName then
-        exit(TCUDAFuncParam(item));
-  end;
-end;
-
- 
-// ------------------
-// ------------------ TCUDAMemData ------------------
-// ------------------
-
-constructor TCUDAMemData.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  fData := nil;
-  FHandle := nil;
-  FMemoryType := mtHost;
-  fWidth := 256;
-  fHeight := 0;
-  fDepth := 0;
-  fPitch := 0;
-  fChannelsType := ctInt8;
-  fChannelsNum := cnOne;
-  FOpenGLRefArray := False;
-  FMapping := False;
-end;
-
-function TCUDAMemData.Data<EType>(X: Integer): GCUDAHostElementAccess<EType>;
-var
-  ptr: PByte;
-  size: Integer;
-begin
-  if (FMemoryType <> mtHost) and not FMapping then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strOnlyHostData);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if FMapping then
-    ptr := PByte(FMappedMemory)
-  else
-    ptr := PByte(GetData);
-  size := ElementSize * X;
-  if size > DataSize then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strOutOfRange);
-    {$ENDIF}
-    Abort;
-  end;
-  Inc(ptr, size);
-  SetElementAccessAddress(ptr, ElementSize);
-end;
-
-function TCUDAMemData.Data<EType>(X, Y: Integer): GCUDAHostElementAccess<EType>;
-var
-  ptr: PByte;
-  size: Integer;
-begin
-  if (FMemoryType <> mtHost) and not FMapping then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strOnlyHostData);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if FMapping then
-    ptr := PByte(FMappedMemory)
-  else
-    ptr := PByte(GetData);
-  size := ElementSize * (X + fWidth*Y);
-  if size > DataSize then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strOutOfRange);
-    {$ENDIF}
-    Abort;
-  end;
-  Inc(ptr, size);
-  SetElementAccessAddress(ptr, ElementSize);
-end;
-
-function TCUDAMemData.Data<EType>(X, Y, Z: Integer): GCUDAHostElementAccess<EType>;
-var
-  ptr: PByte;
-  size: Integer;
-begin
-  if (FMemoryType <> mtHost) and not FMapping then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strOnlyHostData);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if FMapping then
-    ptr := PByte(FMappedMemory)
-  else
-    ptr := PByte(GetData);
-  size := ElementSize * (X + fWidth*(Y  + Z * fHeight));
-  if size > DataSize then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strOutOfRange);
-    {$ENDIF}
-    Abort;
-  end;
-  Inc(ptr, size);
-  SetElementAccessAddress(ptr, ElementSize);
-end;
-
-destructor TCUDAMemData.Destroy;
-begin
-  if Assigned(fTexture) then
-    fTexture.MemDataArray := nil;
-  DestroyHandles;
-  inherited;
-end;
-
-procedure TCUDAMemData.CuNotifyChange(AChange: TCUDAChange);
-begin
-  inherited CuNotifyChange(AChange);
-  if Assigned(fTexture) then
-    fTexture.CuNotifyChange(cuchArray);
-end;
-
-procedure TCUDAMemData.SetMemoryType(const AType: TCUDAMemType);
-begin
-  if FMemoryType <> AType then
-  begin
-    FMemoryType := AType;
-    if (AType = mtArray) and (fChannelsType = ctDouble) then
-      SetChannelType(ctFloat);
-    CuNotifyChange(cuchArray);
-  end;
-end;
-
-procedure TCUDAMemData.SetWidth(const Value: Integer);
-begin
-  Assert(Value > 0);
-  if Value <> fWidth then
-  begin
-    fWidth := Value;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAMemData.UnMap;
-begin
-  if not FMapping then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogErrorFmt(strFailUnmap, [Name]);
-    {$ENDIF}
-    Abort;
-  end;
-
-  Context.Requires;
-
-  case FMemoryType of
-    mtHost:
-      begin
-        FStatus := CUDA_SUCCESS;
-      end;
-    mtDevice:
-      begin
-        FStatus := cuMemcpyHtoD(GetData, FMappedMemory, DataSize);
-        if FStatus = CUDA_SUCCESS then
-          FStatus := cuMemFreeHost(FMappedMemory);
-      end;
-    mtArray:
-      begin
-        FStatus := cuMemcpyHtoA(GetArrayHandle, 0, FMappedMemory, DataSize);
-        if FStatus = CUDA_SUCCESS then
-          FStatus := cuMemFreeHost(FMappedMemory);
-      end;
-  end;
-
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  FMapping := False;
-  FMappedMemory := nil;
-end;
-
-procedure TCUDAMemData.SetHeight(const Value: Integer);
-begin
-  Assert(Value >= 0);
-  if Value <> fHeight then
-  begin
-    fHeight := Value;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAMemData.SetDepth(const Value: Integer);
-begin
-  Assert(Value >= 0);
-  if Value <> fDepth then
-  begin
-    fDepth := Value;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAMemData.SetChannelType(const Value: TCUDAChannelType);
-begin
-  Assert(Value <> ctUndefined);
-  if (FMemoryType = mtArray) and (Value = ctDouble) then
-    exit;
-  if Value <> fChannelsType then
-  begin
-    fChannelsType := Value;
-    CuNotifyChange(cuchFormat);
-  end;
-end;
-
-procedure TCUDAMemData.SetChannelNum(const Value: TCUDAChannelNum);
-begin
-  if Value <> fChannelsNum then
-  begin
-    fChannelsNum := Value;
-    CuNotifyChange(cuchFormat);
-  end;
-end;
-
-function TCUDAMemData.GetData: TCUdeviceptr;
-begin
-  if not Assigned(fData) and (FChanges <> []) then
-    AllocateHandles;
-  Result := fData;
-end;
-
-function TCUDAMemData.GetArrayHandle: PCUarray;
-begin
-  if not Assigned(FHandle) and (FChanges <> []) then
-    AllocateHandles;
-  Result := FHandle;
-end;
-
-procedure TCUDAMemData.AllocateHandles;
-const
-  cArrayFormat: array [ctUInt8 .. ctFloat] of TCUarray_format =
-    (CU_AD_FORMAT_UNSIGNED_INT8, CU_AD_FORMAT_UNSIGNED_INT16,
-    CU_AD_FORMAT_UNSIGNED_INT32, CU_AD_FORMAT_SIGNED_INT8,
-    CU_AD_FORMAT_SIGNED_INT16, CU_AD_FORMAT_SIGNED_INT32, CU_AD_FORMAT_HALF,
-    CU_AD_FORMAT_FLOAT);
-var
-  h, d: Integer;
-  Array2DDesc: TCUDA_ARRAY_DESCRIPTOR;
-  // Array3DDesc: TCUDA_ARRAY3D_DESCRIPTOR;
-  AlignedSize: Integer;
-begin
-  DestroyHandles;
-
-  if cuchFormat in FChanges then
-  begin
-    FElementSize := cChannelTypeSize[fChannelsType] * (Ord(fChannelsNum) + 1);
-  end;
-
-  h := Height;
-  if h = 0 then
-    h := 1;
-  d := Depth;
-  if d = 0 then
-    d := 1;
-  FDataSize := Width * h * d * ElementSize;
-
-  FStatus := CUDA_SUCCESS;
-  Context.Requires;
-  case FMemoryType of
-    mtHost:
-      FStatus := cuMemAllocHost(fData, DataSize);
-    mtDevice:
-      begin
-        if fHeight > 1 then
-        begin
-          AlignedSize := RoundUpToPowerOf2(ElementSize);
-          if AlignedSize < 4 then
-            AlignedSize := 4;
-          if AlignedSize > 16 then
-            AlignedSize := 16;
-          FStatus := cuMemAllocPitch(TCUdeviceptr(fData), fPitch,
-            Width * ElementSize, fHeight, AlignedSize);
-        end
-        else
-          FStatus := cuMemAlloc(TCUdeviceptr(fData), DataSize);
-      end;
-    mtArray:
-      begin
-        Array2DDesc.Width := fWidth;
-        Array2DDesc.Height := fHeight;
-        Array2DDesc.Format := cArrayFormat[fChannelsType];
-        Array2DDesc.NumChannels := Ord(fChannelsNum) + 1;
-        FStatus := cuArrayCreate(FHandle, Array2DDesc);
-      end;
-  end;
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  FChanges := [];
-  inherited;
-end;
-
-procedure TCUDAMemData.DestroyHandles;
-begin
-  case FMemoryType of
-    mtHost, mtDevice:
-      if fData = nil then
-        exit;
-    mtArray:
-      if FHandle = nil then
-        exit;
-  end;
-
-  inherited;
-
-  if not FOpenGLRefArray then
-  begin
-    Context.Requires;
-    case FMemoryType of
-      mtHost:
-        if Assigned(fData) then
-          cuMemFreeHost(fData);
-
-      mtDevice:
-        if Assigned(fData) then
-          cuMemFree(fData);
-
-      mtArray:
-        if Assigned(FHandle) then
-        begin
-          if Assigned(fTexture) then
-            fTexture.MemDataArray := nil;
-          cuArrayDestroy(FHandle);
-        end;
-    end;
-    Context.Release;
-  end;
-  FHandle := nil;
-  fData := nil;
-  fPitch := 0;
-  FDataSize := 0;
-  FElementSize := 0;
-  FOpenGLRefArray := False;
-end;
-
-procedure TCUDAMemData.FillMem(const Value);
-var
-  Ptr: TCUdeviceptr;
-  RowSize: Integer;
-begin
-  if FMemoryType = mtDevice then
-  begin
-    Ptr := GetData;
-    FStatus := CUDA_SUCCESS;
-    Context.Requires;
-    // 1D memory set
-    if fHeight = 0 then
-    begin
-      case fChannelsType of
-        ctUInt8, ctInt8:
-          FStatus := cuMemsetD8(Ptr, Byte(Value), DataSize);
-        ctUInt16, ctInt16, ctHalfFloat:
-          FStatus := cuMemsetD16(Ptr, Word(Value), DataSize div SizeOf(Word));
-        ctUInt32, ctInt32, ctFloat:
-          FStatus := cuMemsetD32(Ptr, DWord(Value), DataSize div SizeOf(DWord));
-      end;
-    end
-    // 2D memory set
-    else
-    begin
-      RowSize := (1 + Ord(fChannelsNum)) * fWidth;
-      case fChannelsType of
-        ctUInt8, ctInt8:
-          FStatus := cuMemsetD2D8(Ptr, fPitch, Byte(Value), RowSize, fHeight);
-        ctUInt16, ctInt16, ctHalfFloat:
-          FStatus := cuMemsetD2D16(Ptr, fPitch, Word(Value), RowSize,
-            fHeight);
-        ctUInt32, ctInt32, ctFloat:
-          FStatus := cuMemsetD2D32(Ptr, fPitch, DWord(Value),
-            RowSize, fHeight);
-      end;
-    end;
-    Context.Release;
-    if FStatus <> CUDA_SUCCESS then
-      Abort
-  end;
-end;
-
-procedure TCUDAMemData.CopyTo(const ADstMemData: TCUDAMemData);
-var
-  copyParam2D: TCUDA_MEMCPY2D;
-  // copyParam3D: TCUDA_MEMCPY3D;
-  Size: Integer;
-begin
-  if not Assigned(ADstMemData) then
-    exit;
-
-  Assert((fDepth = 0) and (ADstMemData.Depth = 0),
-    'Volume copying not yet implemented');
-
-  FStatus := CUDA_SUCCESS;
-
-  if (Height = ADstMemData.Height) and (Height = 0) then
-  begin
-    // 1D copying
-    Size := MinInteger(DataSize, ADstMemData.DataSize);
-    Context.Requires;
-    case MemoryType of
-      mtHost:
-        case ADstMemData.MemoryType of
-          mtHost:
-            Move(RawData^, ADstMemData.RawData^, Size);
-          mtDevice:
-            FStatus := cuMemcpyHtoD(ADstMemData.RawData, RawData, Size);
-          mtArray:
-            FStatus := cuMemcpyHtoA(ADstMemData.ArrayHandle, 0, RawData, Size);
-        end;
-
-      mtDevice:
-        case ADstMemData.MemoryType of
-          mtHost:
-            FStatus := cuMemcpyDtoH(ADstMemData.RawData, RawData, Size);
-          mtDevice:
-            FStatus := cuMemcpyDtoD(ADstMemData.RawData, RawData, Size);
-          mtArray:
-            FStatus := cuMemcpyDtoA(ADstMemData.ArrayHandle, 0, RawData, Size);
-        end;
-
-      mtArray:
-        case ADstMemData.MemoryType of
-          mtHost:
-            FStatus := cuMemcpyAtoH(ADstMemData.RawData, ArrayHandle, 0, Size);
-          mtDevice:
-            FStatus := cuMemcpyAtoD(ADstMemData.RawData, ArrayHandle, 0, Size);
-          mtArray:
-            FStatus := cuMemcpyAtoA(ADstMemData.ArrayHandle, 0,
-              ArrayHandle, 0, Size);
-        end;
-    end;
-    Context.Release;
-  end
-  else
-  begin
-    // 2D copying
-    FillChar(copyParam2D, SizeOf(copyParam2D), 0);
-    // Setup source copy parameters
-    case MemoryType of
-      mtHost:
-        begin
-          copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
-          copyParam2D.srcHost := TCUdeviceptr(RawData);
-        end;
-      mtDevice:
-        begin
-          copyParam2D.srcMemoryType := CU_MEMORYTYPE_DEVICE;
-          copyParam2D.srcDevice := TCUdeviceptr(RawData);
-        end;
-      mtArray:
-        begin
-          copyParam2D.srcMemoryType := CU_MEMORYTYPE_ARRAY;
-          copyParam2D.srcArray := ArrayHandle;
-        end;
-    end;
-    copyParam2D.srcPitch := fPitch;
-    // Setup destination copy parameters
-    case ADstMemData.FMemoryType of
-      mtHost:
-        begin
-          copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
-          copyParam2D.dstHost := TCUdeviceptr(ADstMemData.RawData);
-        end;
-      mtDevice:
-        begin
-          copyParam2D.dstMemoryType := CU_MEMORYTYPE_DEVICE;
-          copyParam2D.dstDevice := TCUdeviceptr(ADstMemData.RawData);
-        end;
-      mtArray:
-        begin
-          copyParam2D.dstMemoryType := CU_MEMORYTYPE_ARRAY;
-          copyParam2D.dstArray := ADstMemData.ArrayHandle;
-        end;
-    end;
-    copyParam2D.dstPitch := ADstMemData.fPitch;
-
-    copyParam2D.WidthInBytes := Cardinal(MinInteger(ElementSize * Width,
-      ADstMemData.ElementSize * ADstMemData.Width));
-    copyParam2D.Height := MinInteger(fHeight, ADstMemData.Height);
-
-    Context.Requires;
-    FStatus := cuMemcpy2D(@copyParam2D);
-    Context.Release;
-  end;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort
-end;
-
-procedure TCUDAMemData.SubCopyTo(const ADstMemData: TCUDAMemData;
-  ASrcXYZ, ADstXYZ, ASizes: IntElement.TVector3);
-var
-  copyParam2D: TCUDA_MEMCPY2D;
-  // copyParam3D: TCUDA_MEMCPY3D;
-begin
-  if not Assigned(ADstMemData) then
-    exit;
-
-  // Clamp sizes
-  ASrcXYZ[0] := MinInteger(ASrcXYZ[0], Width - 1);
-  ASrcXYZ[1] := MinInteger(ASrcXYZ[1], MaxInteger(Height - 1, 0));
-  ASrcXYZ[2] := MinInteger(ASrcXYZ[2], MaxInteger(Depth - 1, 0));
-
-  ADstXYZ[0] := MinInteger(ADstXYZ[0], ADstMemData.Width - 1);
-  ADstXYZ[1] := MinInteger(ADstXYZ[1], MaxInteger(ADstMemData.Height - 1, 0));
-  ADstXYZ[2] := MinInteger(ADstXYZ[2], MaxInteger(ADstMemData.Depth - 1, 0));
-
-  ASizes[0] := MinInteger(ASizes[0], Width, ADstMemData.Width);
-  ASizes[1] := MinInteger(ASizes[1], Height, ADstMemData.Height);
-  ASizes[2] := MinInteger(ASizes[2], Depth, ADstMemData.Depth);
-
-  Assert(ASizes[2] = 0, 'Volume copying not yet implemented');
-
-  FStatus := CUDA_SUCCESS;
-
-  if ASizes[2] = 0 then
-  begin
-    // 2D copying
-    FillChar(copyParam2D, SizeOf(copyParam2D), 0);
-    // Setup source copy parameters
-    case MemoryType of
-      mtHost:
-        begin
-          copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
-          copyParam2D.srcHost := TCUdeviceptr(RawData);
-        end;
-      mtDevice:
-        begin
-          copyParam2D.srcMemoryType := CU_MEMORYTYPE_DEVICE;
-          copyParam2D.srcDevice := TCUdeviceptr(RawData);
-        end;
-      mtArray:
-        begin
-          copyParam2D.srcMemoryType := CU_MEMORYTYPE_ARRAY;
-          copyParam2D.srcArray := ArrayHandle;
-        end;
-    end;
-    copyParam2D.srcXInBytes := ASrcXYZ[0] * FElementSize;
-    copyParam2D.srcY := ASrcXYZ[1];
-    copyParam2D.srcPitch := fPitch;
-    // Setup destination copy parameters
-    case ADstMemData.FMemoryType of
-      mtHost:
-        begin
-          copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
-          copyParam2D.dstHost := TCUdeviceptr(ADstMemData.RawData);
-        end;
-      mtDevice:
-        begin
-          copyParam2D.dstMemoryType := CU_MEMORYTYPE_DEVICE;
-          copyParam2D.dstDevice := TCUdeviceptr(ADstMemData.RawData);
-        end;
-      mtArray:
-        begin
-          copyParam2D.dstMemoryType := CU_MEMORYTYPE_ARRAY;
-          copyParam2D.dstArray := ADstMemData.ArrayHandle;
-        end;
-    end;
-    copyParam2D.dstXInBytes := ADstXYZ[0] * ADstMemData.FElementSize;
-    copyParam2D.dstY := ADstXYZ[1];
-    copyParam2D.dstPitch := ADstMemData.fPitch;
-
-    copyParam2D.WidthInBytes := Cardinal(MinInteger(ElementSize * ASizes[0],
-      ADstMemData.ElementSize * ASizes[0]));
-    copyParam2D.Height := MaxInteger(ASizes[1], 1);
-
-    Context.Requires;
-    FStatus := cuMemcpy2D(@copyParam2D);
-    Context.Release;
-  end;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort
-end;
-
-procedure TCUDAMemData.CopyTo(const AGLImage: TGLBitmap32);
-var
-  copyParam2D: TCUDA_MEMCPY2D;
-  // copyParam3D: TCUDA_MEMCPY3D;
-begin
-  if not Assigned(AGLImage) then
-    exit;
-
-  Assert((fDepth = 0) and (AGLImage.Depth = 0),
-    'Volume copying not yet implemented');
-
-  FillChar(copyParam2D, SizeOf(copyParam2D), 0);
-  // Setup source copy parameters
-  case FMemoryType of
-    mtHost:
-      begin
-        copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
-        copyParam2D.srcHost := TCUdeviceptr(RawData);
-      end;
-    mtDevice:
-      begin
-        copyParam2D.srcMemoryType := CU_MEMORYTYPE_DEVICE;
-        copyParam2D.srcDevice := TCUdeviceptr(RawData);
-      end;
-    mtArray:
-      begin
-        copyParam2D.srcMemoryType := CU_MEMORYTYPE_ARRAY;
-        copyParam2D.srcArray := ArrayHandle;
-      end;
-  end;
-  copyParam2D.srcPitch := fPitch;
-  // Setup destination copy parameters
-  copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
-  copyParam2D.dstHost := AGLImage.Data;
-  copyParam2D.dstPitch := AGLImage.ElementSize * AGLImage.Width;
-
-  copyParam2D.WidthInBytes :=
-    MinInteger(Cardinal(ElementSize * Width), copyParam2D.dstPitch);
-  copyParam2D.Height := MinInteger(Height, AGLImage.Height);
-
-  Context.Requires;
-  FStatus := cuMemcpy2D(@copyParam2D);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-procedure TCUDAMemData.CopyTo(const AGLGraphic: TCUDAGraphicResource;
-  aAttr: string);
-var
-  pMap: TCUdeviceptr;
-  mapSize: Integer;
-begin
-  if not Assigned(AGLGraphic.FHandle[0]) then
-    exit;
-
-  Context.Requires;
-  AGLGraphic.MapResources;
-
-  if AGLGraphic.FResourceType = rtBuffer then
-  begin
-    if Length(aAttr) = 0 then
-    begin
-      mapSize := AGLGraphic.GetElementArrayDataSize;
-      pMap := AGLGraphic.GetElementArrayAddress;
-    end
-    else
-    begin
-      mapSize := AGLGraphic.GetAttributeArraySize(aAttr);
-      pMap := AGLGraphic.GetAttributeArrayAddress(aAttr);
-    end;
-  end
-  else
-  begin
-    // TODO: image copying
-    AGLGraphic.UnMapResources;
-    Context.Release;
-    exit;
-  end;
-
-  FStatus := CUDA_SUCCESS;
-
-  case FMemoryType of
-    mtHost:
-      FStatus := cuMemcpyHtoD(pMap, RawData, MinInteger(DataSize, mapSize));
-    mtDevice:
-      FStatus := cuMemcpyDtoD(pMap, RawData, MinInteger(DataSize, mapSize));
-    mtArray:
-      FStatus := cuMemcpyAtoD(pMap, ArrayHandle, 0,
-        MinInteger(DataSize, mapSize));
-  end;
-
-  AGLGraphic.UnMapResources;
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-procedure TCUDAMemData.CopyFrom(const ASrcMemData: TCUDAMemData);
-begin
-  ASrcMemData.CopyTo(Self);
-end;
-
-procedure TCUDAMemData.CopyFrom(const AGLImage: TGLBitmap32);
-var
-  copyParam2D: TCUDA_MEMCPY2D;
-  // copyParam3D: TCUDA_MEMCPY3D;
-begin
-  if not Assigned(AGLImage) then
-    exit;
-
-  Assert((fDepth = 0) and (AGLImage.Depth = 0),
-    'Volume copying not yet implemented');
-
-  FillChar(copyParam2D, SizeOf(copyParam2D), 0);
-  // Setup destination copy parameters
-  case FMemoryType of
-    mtHost:
-      begin
-        copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
-        copyParam2D.dstHost := TCUdeviceptr(RawData);
-      end;
-    mtDevice:
-      begin
-        copyParam2D.dstMemoryType := CU_MEMORYTYPE_DEVICE;
-        copyParam2D.dstDevice := TCUdeviceptr(RawData);
-      end;
-    mtArray:
-      begin
-        copyParam2D.dstMemoryType := CU_MEMORYTYPE_ARRAY;
-        copyParam2D.dstArray := ArrayHandle;
-      end;
-  end;
-  copyParam2D.dstPitch := fPitch;
-  // Setup source copy parameters
-  copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
-  copyParam2D.srcHost := AGLImage.Data;
-  copyParam2D.srcPitch := AGLImage.ElementSize * AGLImage.Width;
-
-  copyParam2D.WidthInBytes := MinInteger(
-    Cardinal(ElementSize * fWidth), copyParam2D.srcPitch);
-  copyParam2D.Height := MinInteger(fHeight, AGLImage.Height);
-
-  Context.Requires;
-  FStatus := cuMemcpy2D(@copyParam2D);
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-procedure TCUDAMemData.CopyFrom(const AGLGraphic: TCUDAGraphicResource;
-  aAttr: string);
-var
-  pMap: TCUdeviceptr;
-  mapSize: Integer;
-begin
-  if not Assigned(AGLGraphic.FHandle[0]) then
-    exit;
-
-  Assert(fDepth = 0, 'Volume copying not yet implemented');
-
-  Context.Requires;
-  AGLGraphic.MapResources;
-
-  if AGLGraphic.fResourceType = rtBuffer then
-  begin
-    if Length(aAttr) = 0 then
-    begin
-      mapSize := AGLGraphic.GetElementArrayDataSize;
-      pMap := AGLGraphic.GetElementArrayAddress;
-    end
-    else
-    begin
-      mapSize := AGLGraphic.GetAttributeArraySize(aAttr);
-      pMap := AGLGraphic.GetAttributeArrayAddress(aAttr);
-    end;
-  end
-  else
-  begin
-    // TODO: image copying
-    AGLGraphic.UnMapResources;
-    Context.Release;
-    exit;
-  end;
-
-  FStatus := CUDA_SUCCESS;
-
-
-  case FMemoryType of
-    mtHost:
-      FStatus := cuMemcpyDtoH(RawData, pMap,
-        Cardinal(MinInteger(DataSize, mapSize)));
-    mtDevice:
-      FStatus := cuMemcpyDtoD(RawData, pMap,
-        Cardinal(MinInteger(DataSize, mapSize)));
-    mtArray:
-      FStatus := cuMemcpyDtoA(ArrayHandle, 0, pMap,
-        Cardinal(MinInteger(DataSize, mapSize)));
-  end;
-  AGLGraphic.UnMapResources;
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-end;
-
-function TCUDAMemData.GetIsAllocated: Boolean;
-begin
-  case FMemoryType of
-    mtHost, mtDevice: Result := Assigned(FData);
-    mtArray: Result := Assigned(FHandle);
-    else
-      Result := False;
-  end;
-end;
-
-procedure TCUDAMemData.Map(const AFlags: TCUDAMemMapFlags);
-var
-  LFlag: Cardinal;
-begin
-  if FMapping then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogErrorFmt(strFailMap, [Name]);
-    {$ENDIF}
-    Abort;
-  end;
-
-  LFlag := 0;
-  if mmfPortable in AFlags then
-    LFlag := LFlag or CU_MEMHOSTALLOC_PORTABLE;
-  if mmfFastWrite in AFlags then
-    LFlag := LFlag or CU_MEMHOSTALLOC_WRITECOMBINED;
-
-  Context.Requires;
-  GetData;
-
-  case FMemoryType of
-    mtHost:
-      begin
-        FStatus := cuMemHostGetDevicePointer(
-          FMappedMemory, GetData, 0);
-      end;
-    mtDevice:
-      begin
-        FStatus := cuMemHostAlloc(
-          FMappedMemory, DataSize, LFlag);
-        if FStatus = CUDA_SUCCESS then
-          FStatus := cuMemcpyDtoH(
-            FMappedMemory, GetData, DataSize);
-      end;
-    mtArray:
-      begin
-        FStatus := cuMemHostAlloc(
-          FMappedMemory, DataSize, LFlag);
-        if FStatus = CUDA_SUCCESS then
-          FStatus := cuMemcpyAtoH(
-            FMappedMemory, GetArrayHandle, 0, DataSize);
-      end;
-  end;
-
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  FMapping := True;
-end;
-
-// ------------------
-// ------------------ TCUDATexture ------------------
-// ------------------
-
-constructor TCUDATexture.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  FHandle := nil;
-  fArray := nil;
-  AddressModeS := amClamp;
-  AddressModeT := amClamp;
-  AddressModeR := amClamp;
-  NormalizedCoord := true;
-  ReadAsInteger := false;
-  FilterMode := fmPoint;
-  fFormat := ctUndefined;
-  fChannelNum := cnOne;
-end;
-
-
-destructor TCUDATexture.Destroy;
-begin
-  if Assigned(fArray) then
-    fArray.fTexture := nil;
-  DestroyHandles;
-  inherited;
-end;
-
-function TCUDATexture.GetHandle: PCUtexref;
-begin
-  if not Assigned(FHandle) or (FChanges <> []) then
-    AllocateHandles;
-  Result := FHandle;
-end;
-
-function TCUDATexture.GetIsAllocated: Boolean;
-begin
-  Result := Assigned(FHandle);
-end;
-
-procedure TCUDATexture.AllocateHandles;
-var
-  pTex: PCUtexref;
-  LName: AnsiString;
-  LModule: TCUDAModule;
-  LFlag: Cardinal;
-  LFormat: TCUarray_format;
-  LChanels: Integer;
-begin
-  if not(FMaster is TCUDAModule) then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strModuleAbsent);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if Length(FKernelName) = 0 then
-    exit;
-
-  LModule := TCUDAModule(FMaster);
-
-  LName := AnsiString(FKernelName);
-  Context.Requires;
-  FStatus := cuModuleGetTexRef(pTex, LModule.FHandle, PAnsiChar(LName));
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-  FHandle := pTex;
-
-  Context.Requires;
-  // Apply changes
-  if (cuchArray in FChanges) and Assigned(fArray) then
-  begin
-    CollectStatus(cuTexRefSetArray(FHandle, fArray.ArrayHandle,
-      CU_TRSA_OVERRIDE_FORMAT));
-    fArray.fTexture := Self;
-    // Update format
-    if cuTexRefGetFormat(LFormat, LChanels, FHandle) = CUDA_SUCCESS then
-      CUDAEnumToChannelDesc(LFormat, LChanels, fFormat, fChannelNum);
-  end;
-
-  if cuchAddresMode in FChanges then
-  begin
-    CollectStatus(cuTexRefSetAddressMode(FHandle, 0,
-      cAddressMode[fAddressModeS]));
-    CollectStatus(cuTexRefSetAddressMode(FHandle, 1,
-      cAddressMode[fAddressModeT]));
-    CollectStatus(cuTexRefSetAddressMode(FHandle, 2,
-      cAddressMode[fAddressModeR]));
-  end;
-
-  if cuchFlag in FChanges then
-  begin
-    LFlag := 0;
-    if fNormalizedCoord then
-      LFlag := LFlag or CU_TRSF_NORMALIZED_COORDINATES;
-    if fReadAsInteger then
-      LFlag := LFlag or CU_TRSF_READ_AS_INTEGER;
-    CollectStatus(cuTexRefSetFlags(FHandle, LFlag));
-  end;
-
-  if cuchFilterMode in FChanges then
-    CollectStatus(cuTexRefSetFilterMode(FHandle, cFilterMode[fFilterMode]));
-
-  Context.Release;
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  FChanges := [];
-  inherited;
-end;
-
-procedure TCUDATexture.DestroyHandles;
-begin
-  if Assigned(FHandle) then
-  begin
-    FHandle := nil;
-    inherited;
-  end;
-end;
-
-procedure TCUDATexture.SetKernelName(const AName: string);
-begin
-  if csLoading in ComponentState then
-    FKernelName := AName
-  else if not Assigned(FHandle) then
-  begin
-    FKernelName := AName;
-    AllocateHandles;
-  end;
-end;
-
-// SetAddressModeS
-//
-
-procedure TCUDATexture.SetAddressModeS(const AMode: TCuAddresMode);
-begin
-  if AMode <> fAddressModeS then
-  begin
-    fAddressModeS := AMode;
-    CuNotifyChange(cuchAddresMode);
-  end;
-end;
-
-procedure TCUDATexture.SetAddressModeT(const AMode: TCuAddresMode);
-begin
-  if AMode <> fAddressModeT then
-  begin
-    fAddressModeT := AMode;
-    CuNotifyChange(cuchAddresMode);
-  end;
-end;
-
-procedure TCUDATexture.SetAddressModeR(const AMode: TCuAddresMode);
-begin
-  if AMode <> fAddressModeR then
-  begin
-    fAddressModeR := AMode;
-    CuNotifyChange(cuchAddresMode);
-  end;
-end;
-
-procedure TCUDATexture.SetNormalizedCoord(const flag: Boolean);
-begin
-  if flag <> fNormalizedCoord then
-  begin
-    fNormalizedCoord := flag;
-    CuNotifyChange(cuchFlag);
-  end;
-end;
-
-procedure TCUDATexture.SetReadAsInteger(const flag: Boolean);
-begin
-  if flag <> fReadAsInteger then
-  begin
-    fReadAsInteger := flag;
-    CuNotifyChange(cuchFlag);
-  end;
-end;
-
-procedure TCUDATexture.SetFilterMode(const mode: TCuFilterMode);
-begin
-  if mode <> fFilterMode then
-  begin
-    fFilterMode := mode;
-    CuNotifyChange(cuchFilterMode);
-  end;
-end;
-
-procedure TCUDATexture.SetFormat(AValue: TCUDAChannelType);
-begin
-  if csLoading in ComponentState then
-    fFormat := AValue
-  else if not Assigned(FHandle) then
-  begin
-    fFormat := AValue;
-    CuNotifyChange(cuchFormat);
-  end;
-end;
-
-procedure TCUDATexture.SetArray(Value: TCUDAMemData);
-begin
-  if Value <> fArray then
-  begin
-    if Assigned(fArray) then
-      fArray.fTexture := nil;
-    if Assigned(Value) then
-    begin
-      if Value.MemoryType <> mtArray then
-        Value := nil
-      else
-      begin
-        fFormat := Value.fChannelsType;
-        fChannelNum := Value.fChannelsNum;
-        if Assigned(Value.fTexture) then
-          Value.fTexture.MemDataArray := nil;
-        Value.fTexture := Self;
-      end;
-    end
-    else
-    begin
-      fFormat := ctUndefined;
-      fChannelNum := cnOne;
-    end;
-    fArray := Value;
-    CuNotifyChange(cuchArray);
-  end;
-end;
-
-procedure TCUDATexture.SetChannelNum(AValue: TCUDAChannelNum);
-begin
-  if csLoading in ComponentState then
-    fChannelNum := AValue
-  else if not Assigned(FHandle) then
-  begin
-    fChannelNum := AValue;
-    CuNotifyChange(cuchFormat);
-  end;
-end;
-
- 
-// ------------------
-// ------------------ TCUDAGraphicResource ------------------
-// ------------------
-
-procedure TCUDAGraphicResource.SetMapping(const Value: TCUDAMapping);
-begin
-  if fMapping <> Value then
-  begin
-    fMapping := Value;
-    CuNotifyChange(cuchMapping);
-  end;
-end;
-
-function TCUDAGraphicResource.GetIsAllocated: Boolean;
-var
-  I: Integer;
-begin
-  for I := 0 to High(FHandle) do
-    if Assigned(FHandle[I]) then
-      exit(True);
-  Result := False;
-end;
-
-procedure TCUDAGraphicResource.OnGLHandleAllocate(Sender: TGLVirtualHandle;
-  var Handle: Cardinal);
-begin
-  Handle := GLVirtualHandleCounter;
-  Inc(GLVirtualHandleCounter);
-end;
-
-procedure TCUDAGraphicResource.OnGLHandleDestroy(Sender: TGLVirtualHandle;
-  var Handle: Cardinal);
-begin
-  DestroyHandles;
-end;
-
-procedure TCUDAGraphicResource.SetArray(var AArray: TCUDAMemData;
-  AHandle: PCUarray; ForGLTexture, Volume: Boolean);
-var
-  Desc2D: TCUDA_ARRAY_DESCRIPTOR;
-  Desc3D: TCUDA_ARRAY3D_DESCRIPTOR;
-begin
-  Context.Requires;
-  // Get array descriptor
-  if Volume then
-    FStatus := cuArray3DGetDescriptor(Desc3D, AHandle)
-  else
-    FStatus := cuArrayGetDescriptor(Desc2D, AHandle);
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  // Set array parameters
-  if not Assigned(AArray) then
-    AArray := TCUDAMemData.Create(Owner);
-
-  with AArray do
-  begin
-    if FHandle <> AHandle then
-    begin
-      DestroyHandles;
-      FHandle := AHandle;
-    end;
-    FOpenGLRefArray := ForGLTexture;
-    FMemoryType := mtArray;
-    FPitch := 0;
-    if Volume then
-    begin
-      fWidth := Desc3D.Width;
-      fHeight := Desc3D.Height;
-      fDepth := Desc3D.Depth;
-      CUDAEnumToChannelDesc(Desc3D.Format, Desc3D.NumChannels, fChannelsType,
-        fChannelsNum);
-    end
-    else
-    begin
-      fWidth := Desc2D.Width;
-      fHeight := Desc2D.Height;
-      fDepth := 0;
-      CUDAEnumToChannelDesc(Desc2D.Format, Desc2D.NumChannels, fChannelsType,
-        fChannelsNum);
-    end;
-    FElementSize := cChannelTypeSize[fChannelsType] * (Ord(fChannelsNum) + 1);
-  end;
-end;
-
- 
-// ------------------
-// ------------------ TCUDAUniform ------------------
-// ------------------
-
-constructor TCUDAUniform.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  FHandle := nil;
-  FSize := 0;
-  FType := TCUDAType.CustomType;
-  FDefined := false;
-end;
-
-destructor TCUDAUniform.Destroy;
-begin
-  DestroyHandles;
-  inherited;
-end;
-
-function TCUDAUniform.GetIsAllocated: Boolean;
-begin
-  Result := Assigned(FHandle);
-end;
-
-procedure TCUDAUniform.SetCustomType(const AValue: string);
-begin
-  if csLoading in ComponentState then
-    FCustomType := AValue
-  else if not Assigned(FHandle) then
-  begin
-    FCustomType := AValue;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAUniform.SetDefined(AValue: Boolean);
-begin
-  if not Assigned(FHandle) then
-    FDefined := AValue;
-end;
-
-procedure TCUDAUniform.SetKernelName(const AName: string);
-begin
-  if csLoading in ComponentState then
-    FKernelName := AName
-  else if not Assigned(FHandle) then
-  begin
-    FKernelName := AName;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAUniform.SetSize(const AValue: Cardinal);
-begin
-  if csLoading in ComponentState then
-    FSize := AValue
-  else if not Assigned(FHandle) then
-  begin
-    FSize := AValue;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAUniform.SetType(AValue: TCUDAType);
-begin
-  if csLoading in ComponentState then
-    FType := AValue
-  else if not Assigned(FHandle) then
-  begin
-    FType := AValue;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAUniform.SetRef(AValue: Boolean);
-begin
-  if csLoading in ComponentState then
-    FRef := AValue
-  else if not Assigned(FHandle) then
-  begin
-    FRef := AValue;
-    CuNotifyChange(cuchSize);
-  end;
-end;
- 
-
-// ------------------
-// ------------------ TCUDAConstant ------------------
-// ------------------
-
-procedure TCUDAConstant.AllocateHandles;
-var
-  LName: AnsiString;
-  LModule: TCUDAModule;
-begin
-  if not(FMaster is TCUDAModule) then
-  begin
-    {$IFDEF USE_LOGGING}
-      LogError(strModuleAbsent);
-    {$ENDIF}
-    Abort;
-  end;
-
-  if Length(FKernelName) = 0 then
-    exit;
-
-  LModule := TCUDAModule(FMaster);
-
-  LName := AnsiString(FKernelName);
-  DestroyHandles;
-
-  Context.Requires;
-  FStatus := cuModuleGetGlobal(FHandle, FSize, LModule.FHandle,
-    PAnsiChar(LName));
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  FChanges := [];
-  inherited;
-end;
-
-procedure TCUDAConstant.DestroyHandles;
-begin
-  if Assigned(FHandle) then
-  begin
-    FHandle := nil;
-    inherited;
-  end;
-end;
-
-function TCUDAConstant.GetDeviceAddress: TCUdeviceptr;
-begin
-  if (FChanges <> []) or (FHandle = nil) then
-    AllocateHandles;
-  Result := FHandle;
-end;
-
- 
-
-// ------------------
-// ------------------ TCUDAFuncParam ------------------
-// ------------------
-
-procedure TCUDAFuncParam.AllocateHandles;
-begin
-  if Assigned(Master) and (Master is TCUDAFunction) then
-  begin
-    FHandle := TCUDAFunction(Master).FHandle;
-    if Assigned(FHandle) then
-      inherited;
-  end;
-end;
-
-constructor TCUDAFuncParam.Create(AOwner: TComponent);
-begin
-  inherited;
-  FHandle := nil;
-  FRef := false;
-end;
-
-procedure TCUDAFuncParam.DestroyHandles;
-begin
-  if Assigned(FHandle) then
-  begin
-    FHandle := nil;
-    inherited;
-  end;
-end;
-
- 
-// ------------------------------------------------------------------
-initialization
-// ------------------------------------------------------------------
-
-  RegisterClasses([TGLSCUDA, TGLSCUDACompiler, TCUDAModule, TCUDAFunction,
-    TCUDATexture, TCUDAMemData, TCUDAConstant, TCUDAFuncParam]);
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDA;
+
+(* CUDA routines implementation *)
+
+interface
+
+{$I GLScene.inc}
+
+uses
+  System.Types,
+  System.Classes,
+  System.SysUtils,
+
+  GLS.PersistentClasses,
+  GLS.BaseClasses,
+  GLS.Context,
+  GLS.VectorGeometry,
+  GLS.VectorTypes,
+  GLS.VectorLists,
+  GLS.Graphics,
+  GLS.Strings,
+  GLS.Utils,
+
+  Import.CUDAApi,
+  Import.CUDARunTime,
+  GPU.CUDAParser,
+  GPU.CUDAFourierTransform,
+  GPU.CUDACompiler,
+  GPU.CUDAContext,
+  GPU.CUDADataAccess;
+type
+  TCUDAChange = (cuchDevice, cuchContext, cuchSize, cuchAddresMode, cuchFlag,
+    cuchFilterMode, cuchArray, cuchFormat, cuchMapping);
+  TCUDAChanges = set of TCUDAChange;
+
+  TCuAddresMode = (amWrap, amClamp, amMirror);
+  TCuFilterMode = (fmPoint, fmLinear);
+
+  TCUDAChannelType = (ctUndefined, ctUInt8, ctUInt16, ctUInt32, ctInt8, ctInt16,
+    ctInt32, ctHalfFloat, ctFloat, ctDouble);
+
+type
+
+  TCUDAChannelNum = (cnOne, cnTwo, cnThree, cnFour);
+
+  TChannelTypeAndNum = record
+    F: TCUDAChannelType;
+    C: TCUDAChannelNum;
+  end;
+
+  TCUDAMapping = (grmDefault, grmReadOnly, grmWriteDiscard);
+
+  TCUDAComponent = class(TCUDAHandlesMaster)
+  private
+    FMaster: TCUDAComponent;
+    FItems: TPersistentObjectList;
+    procedure SetMaster(AMaster: TCUDAComponent);
+    function GetItem(const i: Integer): TCUDAComponent;
+    function GetItemsCount: Integer;
+  protected
+    FStatus: TCUresult;
+    FChanges: TCUDAChanges;
+    function GetContext: TCUDAContext; override;
+    procedure CollectStatus(AStatus: TCUresult);
+    procedure GetChildren(AProc: TGetChildProc; Root: TComponent); override;
+    procedure AddItem(AItem: TCUDAComponent);
+    procedure RemoveItem(AItem: TCUDAComponent);
+    procedure DeleteItems;
+    procedure SetName(const NewName: TComponentName); override;
+    function GetIsAllocated: Boolean; virtual; abstract;
+  public
+    destructor Destroy; override;
+    procedure CuNotifyChange(AChange: TCUDAChange); virtual;
+    function GetParentComponent: TComponent; override;
+    procedure SetParentComponent(Value: TComponent); override;
+    function HasParent: Boolean; override;
+    function GetItemByName(const name: string): TCUDAComponent;
+    function MakeUniqueName(const BaseName: string): string;
+    property Master: TCUDAComponent read FMaster write SetMaster;
+    property Context: TCUDAContext read GetContext;
+    property Items[const i: Integer]: TCUDAComponent read GetItem;
+    property ItemsCount: Integer read GetItemsCount;
+    property Status: TCUresult read FStatus;
+    // Return true if handle is allocated (i.e. component has device object)
+    property IsAllocated: Boolean read GetIsAllocated;
+  end;
+
+  TCUDAComponentClass = class of TCUDAComponent;
+
+  TCUDAMemData = class;
+  TCUDAFunction = class;
+  TCUDATexture = class;
+  TGLCUDA = class;
+  TCUDAConstant = class;
+
+  TCUDAModule = class(TCUDAComponent)
+  private
+    FHandle: PCUmodule;
+    FCode: TStringList;
+    FCodeType: TGLSCUDACompilerOutput;
+    FCompiler: TGLCUDACompiler;
+    procedure SetCode(const Value: TStringList);
+    procedure SetCompiler(const Value: TGLCUDACompiler);
+    function GetKernelFunction(const AName: string): TCUDAFunction;
+    function GetKernelTexture(const AName: string): TCUDATexture;
+    function GetKernelConstant(const AName: string): TCUDAConstant;
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    procedure OnChangeCode(Sender: TObject);
+    procedure Loaded; override;
+    function GetContext: TCUDAContext; override;
+    function GetIsAllocated: Boolean; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure Assign(Source: TPersistent); override;
+    procedure LoadFromFile(const AFilename: string);
+    procedure LoadFromSource;
+    procedure Unload;
+    procedure LoadAndCompile;
+    property Context: TCUDAContext read GetContext;
+    property CodeType: TGLSCUDACompilerOutput read FCodeType;
+    property KernelFunction[const AName: string]: TCUDAFunction
+      read GetKernelFunction;
+    property KernelTexture[const AName: string]: TCUDATexture
+      read GetKernelTexture;
+    property KernelConstant[const AName: string]: TCUDAConstant
+      read GetKernelConstant;
+  published
+    property Code: TStringList read FCode write SetCode;
+    property Compiler: TGLCUDACompiler read FCompiler write SetCompiler;
+  end;
+
+  TGLResourceType = (rtTexture, rtBuffer);
+
+  //  Abstract class of graphic resources.
+  TCUDAGraphicResource = class(TCUDAComponent)
+  protected
+    FHandle: array [0 .. 7] of PCUgraphicsResource;
+    FMapping: TCUDAMapping;
+    FResourceType: TGLResourceType;
+    FGLContextHandle: TGLVirtualHandle;
+    FMapCounter: Integer;
+    function GetIsAllocated: Boolean; override;
+    procedure OnGLHandleAllocate(Sender: TGLVirtualHandle;
+      var Handle: Cardinal);
+    procedure OnGLHandleDestroy(Sender: TGLVirtualHandle; var Handle: Cardinal);
+    procedure BindArrayToTexture(var cudaArray: TCUDAMemData;
+      ALeyer, ALevel: LongWord); virtual; abstract;
+    procedure SetArray(var AArray: TCUDAMemData; AHandle: PCUarray;
+      ForGLTexture, Volume: Boolean);
+    function GetAttributeArraySize(const Attr: string): LongWord; virtual; abstract;
+    function GetAttributeArrayAddress(const Attr: string): Pointer; virtual;
+      abstract;
+    function GetElementArrayDataSize: LongWord; virtual; abstract;
+    function GetElementArrayAddress: Pointer; virtual; abstract;
+    procedure SetMapping(const Value: TCUDAMapping); virtual;
+    property Mapping: TCUDAMapping read FMapping write SetMapping
+      default grmDefault;
+  public
+    procedure MapResources; virtual; abstract;
+    procedure UnMapResources; virtual; abstract;
+  end;
+
+  TCUDAMemType = (mtHost, mtDevice, mtArray);
+  TCUDAMemMapFlag =
+  (
+    mmfPortable, // Memory is shared between contexts
+    mmfFastWrite // Fast write, slow read
+  );
+  TCUDAMemMapFlags = set of TCUDAMemMapFlag;
+
+  TCUDAMemData = class(TCUDAComponent)
+  private
+    FData: TCUdeviceptr;
+    FMappedMemory: TCUdeviceptr;
+    FHandle: PCUarray;
+    FWidth: Integer;
+    FHeight: Integer;
+    FDepth: Integer;
+    FPitch: Cardinal;
+    FElementSize: Integer;
+    FDataSize: Integer;
+    FChannelsType: TCUDAChannelType;
+    fChannelsNum: TCUDAChannelNum;
+    FMemoryType: TCUDAMemType;
+    FTexture: TCUDATexture;
+    FOpenGLRefArray: Boolean;
+    FMapping: Boolean;
+    procedure SetMemoryType(const AType: TCUDAMemType);
+    procedure SetWidth(const Value: Integer);
+    procedure SetHeight(const Value: Integer);
+    procedure SetDepth(const Value: Integer);
+    procedure SetChannelType(const Value: TCUDAChannelType);
+    procedure SetChannelNum(const Value: TCUDAChannelNum);
+    function GetData: TCUdeviceptr;
+    function GetArrayHandle: PCUarray;
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    function GetIsAllocated: Boolean; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure CuNotifyChange(AChange: TCUDAChange); override;
+    (* Map device and array memory to host or host memory to device.
+       Mapping is necessary for modifying device data.
+       When mapped host memory - it can be accessed in device side
+       via MappedHostAddress. *)
+    procedure Map(const AFlags: TCUDAMemMapFlags = []);
+    // Done mapping operation.
+    procedure UnMap;
+    function Data<EType>(X: Integer): GCUDAHostElementAccess<EType>; overload;
+    function Data<EType>(X, Y: Integer): GCUDAHostElementAccess<EType>; overload;
+    function Data<EType>(X, Y, Z: Integer): GCUDAHostElementAccess<EType>; overload;
+    //  Fill device data
+    procedure FillMem(const Value);
+    procedure CopyTo(const ADstMemData: TCUDAMemData); overload;
+    procedure CopyTo(const AGLImage: TGLImage); overload;
+    //  Copy data to Graphic resource.
+    procedure CopyTo(const AGLGraphic: TCUDAGraphicResource;
+      aAttr: string = ''); overload;
+    procedure CopyFrom(const ASrcMemData: TCUDAMemData); overload;
+    procedure CopyFrom(const AGLImage: TGLBitmap32); overload;
+    procedure CopyFrom(const AGLGraphic: TCUDAGraphicResource;
+      aAttr: string = ''); overload;
+    procedure SubCopyTo(const ADstMemData: TCUDAMemData;
+      ASrcXYZ, ADstXYZ, ASizes: IntElement.TVector3);
+    property ElementSize: Integer read FElementSize;
+    property DataSize: Integer read FDataSize;
+    property Pitch: Cardinal read fPitch;
+    property RawData: TCUdeviceptr read GetData;
+    property MappedMemoryAddress: TCUdeviceptr read FMappedMemory;
+    property ArrayHandle: PCUarray read GetArrayHandle;
+  published
+    property Width: Integer read fWidth write SetWidth default 256;
+    property Height: Integer read fHeight write SetHeight default 0;
+    property Depth: Integer read fDepth write SetDepth default 0;
+    property MemoryType: TCUDAMemType read FMemoryType write SetMemoryType
+      default mtHost;
+    property ChannelsType: TCUDAChannelType read fChannelsType
+      write SetChannelType default ctInt8;
+    property ChannelsNum: TCUDAChannelNum read fChannelsNum write SetChannelNum
+      default cnOne;
+  end;
+
+  TCUDAUniform = class(TCUDAComponent)
+  protected
+    FHandle: TCUdeviceptr;
+    FSize: Cardinal;
+    FKernelName: string;
+    FType: TCUDAType;
+    FCustomType: string;
+    FRef: Boolean;
+    FDefined: Boolean;
+    procedure SetKernelName(const AName: string);
+    procedure SetType(AValue: TCUDAType);
+    procedure SetCustomType(const AValue: string);
+    procedure SetSize(const AValue: Cardinal);
+    procedure SetRef(AValue: Boolean);
+    procedure SetDefined(AValue: Boolean);
+
+    property KernelName: string read FKernelName write SetKernelName;
+    property DataType: TCUDAType read FType write SetType;
+    property CustomType: string read FCustomType write SetCustomType;
+    property Size: Cardinal read FSize write SetSize;
+    property Reference: Boolean read FRef write SetRef;
+    function GetIsAllocated: Boolean; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    property IsValueDefined: Boolean read FDefined write SetDefined;
+  end;
+
+
+  TCUDAConstant = class(TCUDAUniform)
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    function GetDeviceAddress: TCUdeviceptr;
+  public
+    property DeviceAddress: TCUdeviceptr read GetDeviceAddress;
+  published
+    property KernelName;
+    property DataType;
+    property CustomType;
+    property Size;
+    property Reference;
+  end;
+
+  TCUDAFuncParam = class(TCUDAUniform)
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+  published
+    property KernelName;
+    property DataType;
+    property CustomType;
+    property Size;
+    property Reference;
+  end;
+
+  TCUDAFunction = class(TCUDAComponent)
+  private
+    FKernelName: string;
+    FHandle: PCUfunction;
+    FAutoSync: Boolean;
+    FBlockShape: TCUDADimensions;
+    FGrid: TCUDADimensions;
+    ParamOffset: Integer;
+    FLaunching: Boolean;
+    FOnParameterSetup: TNotifyEvent;
+    procedure SetBlockShape(const AShape: TCUDADimensions);
+    procedure SetGrid(const AGrid: TCUDADimensions);
+    procedure SetKernelName(const AName: string);
+    function GetHandle: PCUfunction;
+    procedure SetSharedMemorySize(Value: Integer);
+    function GetSharedMemorySize: Integer;
+    function GetMaxThreadPerBlock: Integer;
+    function GetConstMemorySize: Integer;
+    function GetLocalMemorySize: Integer;
+    function GetNumRegisters: Integer;
+    function GetParameter(const AName: string): TCUDAFuncParam;
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    function GetIsAllocated: Boolean; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure SetParam(Value: Integer); overload;
+    procedure SetParam(Value: Cardinal); overload;
+    procedure SetParam(Value: Single); overload;
+    procedure SetParam(Value: TVector2i); overload;
+    procedure SetParam(Value: TVector3i); overload;
+    procedure SetParam(Value: TVector4i); overload;
+    procedure SetParam(Value: TVector2f); overload;
+    procedure SetParam(Value: TVector3f); overload;
+    procedure SetParam(Value: TVector4f); overload;
+    procedure SetParam(MemData: TCUDAMemData); overload;
+    procedure SetParam(TexRef: TCUDATexture); overload;
+    procedure SetParam(Ptr: Pointer); overload;
+    property Parameters[const AName: string]: TCUDAFuncParam read GetParameter;
+    procedure Launch(Grided: Boolean = true);
+    property Handle: PCUfunction read GetHandle;
+    property SharedMemorySize: Integer read GetSharedMemorySize
+      write SetSharedMemorySize;
+    property MaxThreadPerBlock: Integer read GetMaxThreadPerBlock;
+    property ConstMemorySize: Integer read GetConstMemorySize;
+    property LocalMemorySize: Integer read GetLocalMemorySize;
+    property NumRegisters: Integer read GetNumRegisters;
+  published
+    property KernelName: string read FKernelName write SetKernelName;
+    property AutoSync: Boolean read FAutoSync write FAutoSync default true;
+    property BlockShape: TCUDADimensions read FBlockShape write SetBlockShape;
+    property Grid: TCUDADimensions read FGrid write SetGrid;
+    property OnParameterSetup: TNotifyEvent read FOnParameterSetup
+      write FOnParameterSetup;
+  end;
+
+  TCUDATexture = class(TCUDAComponent)
+  private
+    FKernelName: string;
+    FHandle: PCUtexref;
+    fArray: TCUDAMemData;
+    fAddressModeS, fAddressModeT, fAddressModeR: TCuAddresMode;
+    fNormalizedCoord: Boolean;
+    fReadAsInteger: Boolean;
+    fFilterMode: TCuFilterMode;
+    fFormat: TCUDAChannelType;
+    fChannelNum: TCUDAChannelNum;
+    procedure SetKernelName(const AName: string);
+    procedure SetAddressModeS(const AMode: TCuAddresMode);
+    procedure SetAddressModeT(const AMode: TCuAddresMode);
+    procedure SetAddressModeR(const AMode: TCuAddresMode);
+    procedure SetNormalizedCoord(const flag: Boolean);
+    procedure SetReadAsInteger(const flag: Boolean);
+    procedure SetFilterMode(const mode: TCuFilterMode);
+    procedure SetFormat(AValue: TCUDAChannelType);
+    procedure SetChannelNum(AValue: TCUDAChannelNum);
+    procedure SetArray(Value: TCUDAMemData);
+    function GetHandle: PCUtexref;
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    function GetIsAllocated: Boolean; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    property Handle: PCUtexref read GetHandle;
+  published
+    property KernelName: string read FKernelName write SetKernelName;
+    property AddressModeS: TCuAddresMode read fAddressModeS
+      write SetAddressModeS default amClamp;
+    property AddressModeT: TCuAddresMode read fAddressModeT
+      write SetAddressModeT default amClamp;
+    property AddressModeR: TCuAddresMode read fAddressModeR
+      write SetAddressModeR default amClamp;
+    property NormalizedCoord: Boolean read fNormalizedCoord
+      write SetNormalizedCoord default true;
+    property ReadAsInteger: Boolean read fReadAsInteger write SetReadAsInteger
+      default false;
+    property FilterMode: TCuFilterMode read fFilterMode write SetFilterMode
+      default fmPoint;
+    property Format: TCUDAChannelType read fFormat write SetFormat;
+    property ChannelNum: TCUDAChannelNum read fChannelNum write SetChannelNum;
+    property MemDataArray: TCUDAMemData read fArray write SetArray;
+  end;
+
+  TGLCUDA = class(TCUDAComponent)
+  private
+    fDevice: TGLCUDADevice;
+    fContext: TCUDAContext;
+    FOnOpenGLInteropInit: TOnOpenGLInteropInit;
+    procedure SetDevice(const Value: TGLCUDADevice);
+    procedure SetOnOpenGLInteropInit(AEvent: TOnOpenGLInteropInit);
+    function GetModule(const i: Integer): TCUDAModule;
+  protected
+    procedure Notification(AComponent: TComponent;
+      Operation: TOperation); override;
+    function GetContext: TCUDAContext; override;
+    function GetIsAllocated: Boolean; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    property Context: TCUDAContext read GetContext;
+    property Modules[const i: Integer]: TCUDAModule read GetModule;
+  published
+    
+    property ComputingDevice: TGLCUDADevice read fDevice write SetDevice;
+    property OnOpenGLInteropInit: TOnOpenGLInteropInit read FOnOpenGLInteropInit
+      write SetOnOpenGLInteropInit;
+  end;
+
+function GetChannelTypeAndNum(AType: TCUDAType): TChannelTypeAndNum;
+procedure RegisterCUDAComponentNameChangeEvent(ANotifyEvent: TNotifyEvent);
+procedure DeRegisterCUDAComponentNameChangeEvent;
+
+//-----------------------------------------------------------------
+implementation
+//-----------------------------------------------------------------
+
+
+const
+  cAddressMode: array [TCuAddresMode] of TCUaddress_mode =
+    (CU_TR_ADDRESS_MODE_WRAP, CU_TR_ADDRESS_MODE_CLAMP,
+    CU_TR_ADDRESS_MODE_MIRROR);
+
+  cFilterMode: array [TCuFilterMode] of TCUfilter_mode =
+    (CU_TR_FILTER_MODE_POINT, CU_TR_FILTER_MODE_LINEAR);
+
+const
+  cCUDATypeToTexFormat: array [TCUDAType] of TChannelTypeAndNum =
+    ((F: ctUndefined; C: cnOne), (F: ctInt8; C: cnOne), (F: ctUInt8; C: cnOne),
+    (F: ctInt8; C: cnTwo), (F: ctUInt8; C: cnTwo), (F: ctInt8; C: cnThree),
+    (F: ctUInt8; C: cnThree), (F: ctInt8; C: cnFour), (F: ctUInt8; C: cnFour),
+    (F: ctInt16; C: cnOne), (F: ctUInt16; C: cnOne), (F: ctInt16; C: cnTwo),
+    (F: ctUInt16; C: cnTwo), (F: ctInt16; C: cnThree), (F: ctUInt16;
+    C: cnThree), (F: ctInt16; C: cnFour), (F: ctUInt16; C: cnFour), (F: ctInt32;
+    C: cnOne), (F: ctUInt32; C: cnOne), (F: ctInt32; C: cnTwo), (F: ctUInt32;
+    C: cnTwo), (F: ctInt32; C: cnThree), (F: ctUInt32; C: cnThree), (F: ctInt32;
+    C: cnFour), (F: ctUInt32; C: cnFour), (F: ctUndefined; C: cnOne),
+    (F: ctUndefined; C: cnOne), (F: ctUndefined; C: cnTwo), (F: ctUndefined;
+    C: cnTwo), (F: ctUndefined; C: cnThree), (F: ctUndefined; C: cnThree),
+    (F: ctUndefined; C: cnFour), (F: ctUndefined; C: cnFour), (F: ctFloat;
+    C: cnOne), (F: ctFloat; C: cnTwo), (F: ctFloat; C: cnThree), (F: ctFloat;
+    C: cnFour), (F: ctUndefined; C: cnOne), (F: ctUndefined; C: cnOne),
+    (F: ctUndefined; C: cnTwo), (F: ctUndefined; C: cnTwo), (F: ctUndefined;
+    C: cnThree), (F: ctUndefined; C: cnThree), (F: ctUndefined; C: cnFour),
+    (F: ctUndefined; C: cnFour), (F: ctUndefined; C: cnOne), (F: ctUndefined;
+    C: cnTwo), (F: ctUndefined; C: cnThree), (F: ctUndefined; C: cnFour),
+    (F: ctInt8; C: cnOne), (F: ctInt16; C: cnOne), (F: ctInt32; C: cnOne),
+    (F: ctUInt8; C: cnOne), (F: ctUInt16; C: cnOne), (F: ctUInt32; C: cnOne));
+
+  cChannelTypeSize: array [TCUDAChannelType] of Integer =
+      (0, 1, 2, 4, 1, 2, 4, 2, 4, 8);
+
+var
+  GLVirtualHandleCounter: Cardinal = 1;
+  vCUDAComponentNameChangeEvent: TNotifyEvent;
+
+function GetChannelTypeAndNum(AType: TCUDAType): TChannelTypeAndNum;
+begin
+  Result := cCUDATypeToTexFormat[AType];
+end;
+
+procedure CUDAEnumToChannelDesc(const Fmt: TCUarray_format; const nCh: LongWord;
+  out oFormat: TCUDAChannelType; out oNum: TCUDAChannelNum);
+begin
+  case Fmt of
+    CU_AD_FORMAT_UNSIGNED_INT8:
+      oFormat := ctUInt8;
+    CU_AD_FORMAT_UNSIGNED_INT16:
+      oFormat := ctUInt16;
+    CU_AD_FORMAT_UNSIGNED_INT32:
+      oFormat := ctUInt32;
+    CU_AD_FORMAT_SIGNED_INT8:
+      oFormat := ctUInt8;
+    CU_AD_FORMAT_SIGNED_INT16:
+      oFormat := ctUInt16;
+    CU_AD_FORMAT_SIGNED_INT32:
+      oFormat := ctUInt32;
+    CU_AD_FORMAT_HALF:
+      oFormat := ctHalfFloat;
+    CU_AD_FORMAT_FLOAT:
+      oFormat := ctFloat;
+  end;
+  case nCh of
+    1: oNum := cnOne;
+    2: oNum := cnTwo;
+    3: oNum := cnThree;
+    4: oNum := cnFour;
+  end;
+end;
+
+procedure RegisterCUDAComponentNameChangeEvent(ANotifyEvent: TNotifyEvent);
+begin
+  vCUDAComponentNameChangeEvent := ANotifyEvent;
+end;
+
+procedure DeRegisterCUDAComponentNameChangeEvent;
+begin
+  vCUDAComponentNameChangeEvent := nil;
+end;
+
+// ------------------
+// ------------------ TGLCUDA ------------------
+// ------------------
+
+constructor TGLCUDA.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  fDevice := nil;
+  fContext := TCUDAContext.Create;
+  FChanges := [];
+end;
+
+destructor TGLCUDA.Destroy;
+begin
+  ComputingDevice := nil;
+  fContext.Destroy;
+  inherited;
+end;
+
+procedure TGLCUDA.Notification(AComponent: TComponent; Operation: TOperation);
+begin
+  if (Operation = opRemove) and (AComponent = fDevice) then
+    ComputingDevice := nil;
+  inherited;
+end;
+
+procedure TGLCUDA.SetDevice(const Value: TGLCUDADevice);
+begin
+  if Value <> fDevice then
+  begin
+    if Assigned(Value) and not Value.Suitable then
+      exit;
+    if Assigned(fDevice) then
+      fDevice.RemoveFreeNotification(Self);
+    fDevice := Value;
+    if Assigned(fDevice) then
+    begin
+      fDevice.FreeNotification(Self);
+      CuNotifyChange(cuchDevice);
+    end;
+  end;
+end;
+
+procedure TGLCUDA.SetOnOpenGLInteropInit(AEvent: TOnOpenGLInteropInit);
+begin
+  FOnOpenGLInteropInit := AEvent;
+  CuNotifyChange(cuchContext);
+end;
+
+function TGLCUDA.GetContext: TCUDAContext;
+begin
+  if cuchDevice in FChanges then
+  begin
+    if Assigned(fDevice) then
+      fContext.Device := fDevice.Device
+    else
+      fContext.Device := nil;
+    Exclude(FChanges, cuchDevice);
+    Include(FChanges, cuchContext);
+  end;
+
+  if (cuchContext in FChanges) and Assigned(fDevice) then
+  begin
+    // Getting OpenGL context to make interoperability
+    fContext.OnOpenGLInteropInit := FOnOpenGLInteropInit;
+    CUDAContextManager.CreateContext(fContext);
+    Exclude(FChanges, cuchContext);
+  end;
+
+  Result := fContext;
+end;
+
+function TGLCUDA.GetIsAllocated: Boolean;
+begin
+  Result := FContext.IsValid;
+end;
+
+function TGLCUDA.GetModule(const i: Integer): TCUDAModule;
+var
+  j, k: Integer;
+begin
+  Result := nil;
+  k := 0;
+  for j := 0 to FItems.Count - 1 do
+  begin
+    if FItems[j] is TCUDAModule then
+    begin
+      if k = i then
+        exit(TCUDAModule(FItems[j]))
+      else
+        Inc(k);
+    end;
+  end;
+end;
+
+ 
+// ------------------
+// ------------------ TCUDAModule ------------------
+// ------------------
+
+constructor TCUDAModule.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  FHandle := nil;
+  FCode := TStringList.Create;
+  TStringList(FCode).OnChange := OnChangeCode;
+end;
+
+destructor TCUDAModule.Destroy;
+begin
+  Unload;
+  FCode.Destroy;
+  if Assigned(FCompiler) then
+    FCompiler.Product := nil;
+  inherited;
+end;
+
+procedure TCUDAModule.Assign(Source: TPersistent);
+var
+  module: TCUDAModule;
+begin
+  if Source is TCUDAModule then
+  begin
+    DestroyHandles;
+    module := TCUDAModule(Source);
+    FCode.Assign(module.FCode);
+    FCodeType := module.FCodeType;
+    AllocateHandles;
+  end;
+  inherited Assign(Source);
+end;
+
+procedure TCUDAModule.SetCompiler(const Value: TGLCUDACompiler);
+begin
+  if Value <> FCompiler then
+  begin
+    // Compiler must used by only one module
+    if Assigned(Value) and Assigned(Value.Product) then
+      exit;
+    FCompiler := Value;
+    if Assigned(FCompiler) then
+      FCompiler.Product := FCode;
+  end;
+end;
+
+function TCUDAModule.GetContext: TCUDAContext;
+begin
+  if Assigned(FMaster) and (FMaster is TGLCUDA) then
+    Result := TGLCUDA(FMaster).Context
+  else
+  begin
+    Result := nil;
+    {$IFDEF USE_LOGGING}
+      LogErrorFmt('Invalid master of module "%s"', [Name]);
+    {$ENDIF}
+    Abort;
+  end;
+end;
+
+function TCUDAModule.GetIsAllocated: Boolean;
+begin
+  Result := Assigned(FHandle);
+end;
+
+procedure TCUDAModule.Loaded;
+var
+  I: Integer;
+begin
+  inherited Loaded;
+  LoadFromSource;
+  for i := ItemsCount - 1 downto 0 do
+    Items[i].AllocateHandles;
+end;
+
+procedure TCUDAModule.AllocateHandles;
+var
+  func: TCUDAFunction;
+  tex: TCUDATexture;
+  cnst: TCUDAConstant;
+  Param: TCUDAFuncParam;
+  i, j: Integer;
+  useless: array of TCUDAComponent;
+  info: TCUDAModuleInfo;
+  bFail: Boolean;
+begin
+  LoadFromSource;
+
+  if Assigned(FCompiler) then
+  begin
+    info := FCompiler.ModuleInfo;
+    info.Owner := Self;
+
+    // Runtime module deployment
+    if not(csDesigning in ComponentState) and Assigned(FCompiler) then
+    begin
+
+      // Redefine function and texture with same names
+      for i := 0 to High(info.func) do
+      begin
+        func := GetKernelFunction(info.func[i].Name);
+        if not Assigned(func) then
+        begin
+          func := TCUDAFunction.Create(Self);
+          func.Master := Self;
+          func.FKernelName := info.func[i].KernelName;
+          func.Name := MakeUniqueName(info.func[i].Name);
+        end
+        else
+          func.DeleteItems;
+
+        try
+          bFail := func.Handle = nil;
+        except
+          bFail := True;
+        end;
+
+        if bFail then
+          func.Destroy
+        else
+        begin
+          for j := 0 to High(info.func[i].Args) do
+          begin
+            Param := TCUDAFuncParam.Create(func);
+            Param.Master := TCUDAComponent(func);
+            Param.FKernelName := info.func[i].Args[j].Name;
+            Param.Name := func.KernelName + '_' + Param.KernelName;
+            Param.FType := info.func[i].Args[j].DataType;
+            Param.FCustomType := info.func[i].Args[j].CustomType;
+            Param.FRef := info.func[i].Args[j].Ref;
+            // Lock properties
+            Param.AllocateHandles;
+          end;
+        end;
+
+      end;
+
+      for i := 0 to High(info.TexRef) do
+      begin
+        tex := GetKernelTexture(info.TexRef[i].Name);
+        if not Assigned(tex) then
+        begin
+          tex := TCUDATexture.Create(Self);
+          tex.Master := Self;
+          tex.FKernelName := info.TexRef[i].Name;
+          tex.fReadAsInteger :=
+            (info.TexRef[i].ReadMode = cudaReadModeElementType);
+          tex.fFormat := cCUDATypeToTexFormat[info.TexRef[i].DataType].F;
+          tex.fChannelNum := cCUDATypeToTexFormat[info.TexRef[i].DataType].C;
+          tex.Name := MakeUniqueName(tex.FKernelName);
+        end;
+
+        try
+          bFail := tex.Handle = nil;
+        except
+          bFail := True;
+        end;
+
+        if bFail then
+          tex.Destroy;
+      end;
+
+      for i := 0 to High(info.Constant) do
+      begin
+        cnst := GetKernelConstant(info.Constant[i].Name);
+        if not Assigned(cnst) then
+        begin
+          cnst := TCUDAConstant.Create(Self);
+          cnst.Master := Self;
+          cnst.FKernelName := info.Constant[i].Name;
+          cnst.FType := info.Constant[i].DataType;
+          cnst.FCustomType := info.Constant[i].CustomType;
+          cnst.Name := MakeUniqueName(cnst.FKernelName);
+          cnst.IsValueDefined := info.Constant[i].DefValue;
+        end;
+
+        try
+          bFail := cnst.DeviceAddress = nil;
+        except
+          bFail := True;
+        end;
+
+        if bFail then
+          cnst.Destroy;
+      end;
+
+      // Delete useless components
+      SetLength(useless, ItemsCount);
+      j := 0;
+      for i := 0 to ItemsCount - 1 do
+        if not Items[i].IsAllocated then
+          begin
+            useless[j] := Items[i];
+            Inc(j);
+          end;
+      for i := 0 to j - 1 do
+        useless[i].Destroy;
+    end;
+  end;
+end;
+
+procedure TCUDAModule.DestroyHandles;
+var
+  I: Integer;
+begin
+  for I := 0 to ItemsCount - 1 do
+    TCUDAComponent(Items[I]).DestroyHandles;
+end;
+
+procedure TCUDAModule.LoadFromFile(const AFilename: string);
+var
+  Status: TCUresult;
+  ext: string;
+  AnsiFileName: AnsiString;
+begin
+  if FileExists(AFilename) then
+  begin
+    ext := ExtractFileExt(AFilename);
+    System.Delete(ext, 1, 1);
+    ext := AnsiLowerCase(ext);
+    FCodeType := codeUndefined;
+    if ext = 'ptx' then
+      FCodeType := codePtx;
+    if ext = 'cubin' then
+      FCodeType := codeCubin;
+    if ext = 'gpu' then
+      FCodeType := codeGpu;
+
+    if (FCodeType = codePtx) or (FCodeType = codeCubin) then
+    begin
+      Unload;
+      Context.Requires;
+      AnsiFileName := AnsiString(AFilename);
+      Status := cuModuleLoad(FHandle, PAnsiChar(AnsiFileName));
+      Context.Release;
+      if Status <> CUDA_SUCCESS then
+        Abort;
+      FCode.LoadFromFile(AFilename);
+      Compiler := nil;
+      AllocateHandles;
+    end
+    else
+      {$IFDEF USE_LOGGING}
+        LogErrorFmt('%s.LoadFromFile: file extension must be ptx or cubin', [Self.ClassName]);
+     {$ENDIF}
+  end
+  else
+   {$IFDEF USE_LOGGING}
+    LogErrorFmt(strFailedOpenFile, [AFilename]);
+   {$ENDIF}
+end;
+
+procedure TCUDAModule.LoadFromSource;
+var
+  Text: AnsiString;
+begin
+  Text := AnsiString(FCode.Text);
+  if Length(Text) > 0 then
+  begin
+    DestroyHandles;
+
+    Text := Text + #00;
+    Context.Requires;
+    FStatus := cuModuleLoadData(FHandle, PAnsiChar(Text));
+    Context.Release;
+    if FStatus <> CUDA_SUCCESS then
+      Abort;
+  end;
+end;
+
+procedure TCUDAModule.LoadAndCompile;
+begin
+  AllocateHandles;
+end;
+
+procedure TCUDAModule.Unload;
+begin
+  if Assigned(FHandle) then
+  begin
+    DestroyHandles;
+    DeleteItems;
+    Context.Requires;
+    FStatus := cuModuleUnload(FHandle);
+    Context.Release;
+    FHandle := nil;
+  end;
+end;
+
+procedure TCUDAModule.OnChangeCode(Sender: TObject);
+begin
+  if not(csLoading in ComponentState) and (Sender is TGLCUDACompiler) then
+  begin
+    AllocateHandles;
+  end;
+end;
+
+procedure TCUDAModule.SetCode(const Value: TStringList);
+begin
+  FCode.Assign(Value);
+end;
+
+function TCUDAModule.GetKernelFunction(const AName: string): TCUDAFunction;
+var
+  i: Integer;
+  item: TComponent;
+begin
+  Result := nil;
+  for i := 0 to Self.ItemsCount - 1 do
+  begin
+    item := Items[i];
+    if item is TCUDAFunction then
+      if TCUDAFunction(item).KernelName = AName then
+        exit(TCUDAFunction(item));
+  end;
+end;
+
+function TCUDAModule.GetKernelTexture(const AName: string): TCUDATexture;
+var
+  i: Integer;
+  item: TComponent;
+begin
+  Result := nil;
+  for i := 0 to Self.ItemsCount - 1 do
+  begin
+    item := Items[i];
+    if item is TCUDATexture then
+      if TCUDATexture(item).KernelName = AName then
+        exit(TCUDATexture(item));
+  end;
+end;
+
+
+function TCUDAModule.GetKernelConstant(const AName: string): TCUDAConstant;
+var
+  i: Integer;
+  item: TComponent;
+begin
+  Result := nil;
+  for i := 0 to Self.ItemsCount - 1 do
+  begin
+    item := Items[i];
+    if item is TCUDAConstant then
+      if TCUDAConstant(item).KernelName = AName then
+        exit(TCUDAConstant(item));
+  end;
+end;
+
+ 
+// ------------------
+// ------------------ TCUDAComponent ------------------
+// ------------------
+
+destructor TCUDAComponent.Destroy;
+begin
+  if Assigned(FMaster) then
+    FMaster.RemoveItem(Self);
+  if Assigned(FItems) then
+  begin
+    DeleteItems;
+    FItems.Free;
+  end;
+  inherited;
+end;
+
+procedure TCUDAComponent.CuNotifyChange(AChange: TCUDAChange);
+begin
+  Include(FChanges, AChange);
+end;
+
+function TCUDAComponent.GetContext: TCUDAContext;
+begin
+  if Self is TGLCUDA then
+    Result := TGLCUDA(Self).Context
+  else
+    Result := TGLCUDA(FMaster).Context;
+end;
+
+procedure TCUDAComponent.CollectStatus(AStatus: TCUresult);
+begin
+  if AStatus <> CUDA_SUCCESS then
+    FStatus := AStatus;
+end;
+
+procedure TCUDAComponent.GetChildren(AProc: TGetChildProc; Root: TComponent);
+var
+  i: Integer;
+begin
+  if Assigned(FItems) then
+    for i := 0 to FItems.Count - 1 do
+      if not IsSubComponent(TComponent(FItems.List^[i])) then
+        AProc(TComponent(FItems.List^[i]));
+end;
+
+procedure TCUDAComponent.SetParentComponent(Value: TComponent);
+begin
+  inherited;
+  if Self is TGLCUDA then
+    exit;
+  if Value <> FMaster then
+    Master := TCUDAComponent(Value);
+end;
+
+function TCUDAComponent.GetParentComponent: TComponent;
+begin
+  Result := FMaster;
+end;
+
+function TCUDAComponent.HasParent: Boolean;
+begin
+  Result := Assigned(FMaster);
+end;
+
+procedure TCUDAComponent.SetMaster(AMaster: TCUDAComponent);
+begin
+  if Assigned(FMaster) then
+    FMaster.RemoveItem(Self);
+  FMaster := AMaster;
+  if Assigned(FMaster) then
+    FMaster.AddItem(Self);
+end;
+
+procedure TCUDAComponent.SetName(const NewName: TComponentName);
+begin
+  if Name <> NewName then
+  begin
+    inherited SetName(NewName);
+    if Assigned(vCUDAComponentNameChangeEvent) then
+      vCUDAComponentNameChangeEvent(Self);
+  end;
+end;
+
+procedure TCUDAComponent.AddItem(AItem: TCUDAComponent);
+begin
+  if not Assigned(FItems) then
+    FItems := TPersistentObjectList.Create;
+  FItems.Add(AItem);
+end;
+
+procedure TCUDAComponent.RemoveItem(AItem: TCUDAComponent);
+begin
+  if not Assigned(FItems) then
+    exit;
+  if AItem.FMaster = Self then
+  begin
+    if AItem.Owner = Self then
+      RemoveComponent(AItem);
+    FItems.Remove(AItem);
+    AItem.FMaster := nil;
+  end;
+end;
+
+procedure TCUDAComponent.DeleteItems;
+var
+  child: TCUDAComponent;
+begin
+  if Assigned(FItems) then
+    while FItems.Count > 0 do
+    begin
+      child := TCUDAComponent(FItems.Pop);
+      child.Free;
+    end;
+end;
+
+function TCUDAComponent.GetItem(const i: Integer): TCUDAComponent;
+begin
+  if Assigned(FItems) and (i < FItems.Count) then
+    Result := TCUDAComponent(FItems[i])
+  else
+    Result := nil;
+end;
+
+function TCUDAComponent.GetItemsCount: Integer;
+begin
+  if Assigned(FItems) then
+    Result := FItems.Count
+  else
+    Result := 0;
+end;
+
+function TCUDAComponent.GetItemByName(const name: string): TCUDAComponent;
+var
+  i: Integer;
+begin
+  Result := nil;
+  for i := 0 to GetItemsCount - 1 do
+  begin
+    if Items[i].Name = name then
+    begin
+      Result := Items[i];
+      exit;
+    end;
+  end;
+end;
+
+function TCUDAComponent.MakeUniqueName(const BaseName: string): string;
+var
+  i: Integer;
+begin
+  Result := BaseName + '1';
+  i := 2;
+  while GetItemByName(Result) <> nil do
+  begin
+    Result := BaseName + IntToStr(i);
+    Inc(i);
+  end;
+end;
+
+ 
+// ------------------
+// ------------------ TCUDAFunction ------------------
+// ------------------
+
+constructor TCUDAFunction.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  FHandle := nil;
+  FAutoSync := true;
+  FBlockShape := TCUDADimensions.Create(Self);
+  FGrid := TCUDADimensions.Create(Self);
+  FLaunching := false;
+end;
+
+ 
+destructor TCUDAFunction.Destroy;
+begin
+  FBlockShape.Destroy;
+  FGrid.Destroy;
+  DestroyHandles;
+  inherited;
+end;
+
+procedure TCUDAFunction.AllocateHandles;
+var
+  LModule: TCUDAModule;
+  ansiname: AnsiString;
+  pFunc: PCUfunction;
+begin
+  DestroyHandles;
+
+  if not(FMaster is TCUDAModule) then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strModuleAbsent);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if Length(FKernelName) = 0 then
+    exit;
+
+  LModule := TCUDAModule(FMaster);
+  if not Assigned(LModule.FHandle) then
+    exit;
+
+  with LModule.Context.Device do
+  begin
+    FBlockShape.MaxSizeX := MaxThreadsDim.SizeX;
+    FBlockShape.MaxSizeY := MaxThreadsDim.SizeY;
+    FBlockShape.MaxSizeZ := MaxThreadsDim.SizeZ;
+    FGrid.MaxSizeX := MaxGridSize.SizeX;
+    FGrid.MaxSizeY := MaxGridSize.SizeY;
+    FGrid.MaxSizeZ := MaxGridSize.SizeZ;
+  end;
+
+  ansiname := AnsiString(FKernelName);
+  Context.Requires;
+  FStatus := cuModuleGetFunction(pFunc, LModule.FHandle, PAnsiChar(ansiname));
+  Context.Release;
+  if FStatus = CUDA_SUCCESS then
+    FHandle := pFunc
+  else
+    Abort;
+  inherited;
+end;
+
+procedure TCUDAFunction.DestroyHandles;
+var
+  i: Integer;
+  item: TComponent;
+begin
+  if Assigned(FHandle) then
+  begin
+    for i := 0 to ItemsCount - 1 do
+    begin
+      item := Items[i];
+      if item is TCUDAFuncParam then
+        TCUDAFuncParam(item).DestroyHandles;
+    end;
+    FHandle := nil;
+    inherited;
+  end;
+end;
+
+procedure TCUDAFunction.SetBlockShape(const AShape: TCUDADimensions);
+begin
+  FBlockShape.Assign(AShape);
+end;
+
+procedure TCUDAFunction.SetGrid(const AGrid: TCUDADimensions);
+begin
+  FGrid.Assign(AGrid);
+end;
+
+procedure TCUDAFunction.SetKernelName(const AName: string);
+begin
+  if csLoading in ComponentState then
+    FKernelName := AName
+  else if not Assigned(FHandle) then
+  begin
+    FKernelName := AName;
+    AllocateHandles;
+  end;
+end;
+
+procedure TCUDAFunction.SetParam(Value: Integer);
+begin
+  if not FLaunching then
+  begin
+   {$IFDEF USE_LOGGING}
+     LogError(strWrongParamSetup);
+   {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSeti(FHandle, ParamOffset, PCardinal(@Value)^);
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(Cardinal));
+end;
+
+procedure TCUDAFunction.SetParam(Value: Cardinal);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+     LogError(strWrongParamSetup);
+   {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSeti(FHandle, ParamOffset, Value);
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(Cardinal));
+end;
+
+procedure TCUDAFunction.SetParam(Value: Single);
+begin
+  if not FLaunching then
+  begin
+   {$IFDEF USE_LOGGING}
+     LogError(strWrongParamSetup);
+   {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetf(FHandle, ParamOffset, Value);
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(Single));
+end;
+
+procedure TCUDAFunction.SetParam(Value: TVector2i);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector2i));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(TVector2i));
+end;
+
+procedure TCUDAFunction.SetParam(Value: TVector3i);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector3i));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(TVector3i));
+end;
+
+procedure TCUDAFunction.SetParam(Value: TVector4i);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector4i));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(TVector4i));
+end;
+
+procedure TCUDAFunction.SetParam(Value: TVector2f);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector2f));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(TVector2f));
+end;
+
+procedure TCUDAFunction.SetParam(Value: TVector3f);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector3f));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(TVector4f));
+end;
+
+procedure TCUDAFunction.SetParam(Value: TVector4f);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSetv(FHandle, ParamOffset, Value, SizeOf(TVector4f));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(TVector4f));
+end;
+
+procedure TCUDAFunction.SetParam(MemData: TCUDAMemData);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSeti(FHandle, ParamOffset, Cardinal(MemData.RawData));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(Cardinal));
+end;
+
+procedure TCUDAFunction.SetParam(TexRef: TCUDATexture);
+var
+  HTexRef: PCUtexref;
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  HTexRef := TexRef.Handle;
+  FStatus := cuParamSetTexRef(FHandle, CU_PARAM_TR_DEFAULT, HTexRef);
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+procedure TCUDAFunction.SetParam(Ptr: Pointer);
+begin
+  if not FLaunching then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strWrongParamSetup);
+    {$ENDIF}
+    Abort;
+  end;
+  FStatus := cuParamSeti(FHandle, ParamOffset, Cardinal(Ptr));
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  Inc(ParamOffset, SizeOf(Cardinal));
+end;
+
+procedure TCUDAFunction.Launch(Grided: Boolean = true);
+begin
+  if not(FMaster is TCUDAModule) then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strModuleAbsent);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if not Assigned(FHandle) then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogErrorFmt(strFuncNotConnected, [Self.ClassName]);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if FLaunching then
+    exit;
+
+  ParamOffset := 0;
+
+  Context.Requires;
+  FLaunching := true;
+  if Assigned(FOnParameterSetup) then
+    try
+      FOnParameterSetup(Self);
+    except
+      FLaunching := false;
+      Context.Release;
+      raise;
+    end;
+  FLaunching := false;
+
+  FStatus := cuParamSetSize(FHandle, ParamOffset);
+  CollectStatus(cuFuncSetBlockShape(FHandle, FBlockShape.SizeX,
+    FBlockShape.SizeY, FBlockShape.SizeZ));
+
+  if FStatus = CUDA_SUCCESS then
+  begin
+    // execute the kernel
+    if Grided then
+      FStatus := cuLaunchGrid(FHandle, FGrid.SizeX, FGrid.SizeY)
+    else
+      FStatus := cuLaunch(FHandle);
+    if FAutoSync then
+      CollectStatus(cuCtxSynchronize);
+  end;
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogErrorFmt(strLaunchFailed, [Self.Name]);
+    {$ENDIF}
+    Abort;
+  end;
+end;
+
+function TCUDAFunction.GetHandle: PCUfunction;
+begin
+  if FHandle = nil then
+    AllocateHandles;
+  Result := FHandle;
+end;
+
+function TCUDAFunction.GetIsAllocated: Boolean;
+begin
+  Result := Assigned(FHandle);
+end;
+
+function TCUDAFunction.GetMaxThreadPerBlock: Integer;
+begin
+  Context.Requires;
+  FStatus := cuFuncGetAttribute(Result,
+    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, Handle);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+function TCUDAFunction.GetSharedMemorySize: Integer;
+begin
+  Context.Requires;
+  FStatus := cuFuncGetAttribute(Result,
+    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, Handle);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+procedure TCUDAFunction.SetSharedMemorySize(Value: Integer);
+var
+  MemPerBlock: NativeUInt;
+begin
+  Context.Requires;
+  MemPerBlock := TGLCUDA(TCUDAModule(FMaster).FMaster)
+    .fDevice.Device.SharedMemPerBlock;
+  if Value < 0 then
+    Value := 0
+  else if Value > Integer(MemPerBlock) then
+    Value := MemPerBlock;
+  FStatus := cuFuncSetSharedSize(Handle, Value);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+function TCUDAFunction.GetConstMemorySize: Integer;
+begin
+  Context.Requires;
+  FStatus := cuFuncGetAttribute(Result,
+    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, Handle);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+function TCUDAFunction.GetLocalMemorySize: Integer;
+begin
+  Context.Requires;
+  FStatus := cuFuncGetAttribute(Result,
+    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, Handle);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+function TCUDAFunction.GetNumRegisters: Integer;
+begin
+  Context.Requires;
+  FStatus := cuFuncGetAttribute(Result, CU_FUNC_ATTRIBUTE_NUM_REGS, Handle);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+function TCUDAFunction.GetParameter(const AName: string): TCUDAFuncParam;
+var
+  i: Integer;
+  item: TComponent;
+begin
+  Result := nil;
+  for i := 0 to Self.ItemsCount - 1 do
+  begin
+    item := Items[i];
+    if item is TCUDAFuncParam then
+      if TCUDAFuncParam(item).KernelName = AName then
+        exit(TCUDAFuncParam(item));
+  end;
+end;
+
+ 
+// ------------------
+// ------------------ TCUDAMemData ------------------
+// ------------------
+
+constructor TCUDAMemData.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  fData := nil;
+  FHandle := nil;
+  FMemoryType := mtHost;
+  fWidth := 256;
+  fHeight := 0;
+  fDepth := 0;
+  fPitch := 0;
+  fChannelsType := ctInt8;
+  fChannelsNum := cnOne;
+  FOpenGLRefArray := False;
+  FMapping := False;
+end;
+
+function TCUDAMemData.Data<EType>(X: Integer): GCUDAHostElementAccess<EType>;
+var
+  ptr: PByte;
+  size: Integer;
+begin
+  if (FMemoryType <> mtHost) and not FMapping then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strOnlyHostData);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if FMapping then
+    ptr := PByte(FMappedMemory)
+  else
+    ptr := PByte(GetData);
+  size := ElementSize * X;
+  if size > DataSize then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strOutOfRange);
+    {$ENDIF}
+    Abort;
+  end;
+  Inc(ptr, size);
+  SetElementAccessAddress(ptr, ElementSize);
+end;
+
+function TCUDAMemData.Data<EType>(X, Y: Integer): GCUDAHostElementAccess<EType>;
+var
+  ptr: PByte;
+  size: Integer;
+begin
+  if (FMemoryType <> mtHost) and not FMapping then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strOnlyHostData);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if FMapping then
+    ptr := PByte(FMappedMemory)
+  else
+    ptr := PByte(GetData);
+  size := ElementSize * (X + fWidth*Y);
+  if size > DataSize then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strOutOfRange);
+    {$ENDIF}
+    Abort;
+  end;
+  Inc(ptr, size);
+  SetElementAccessAddress(ptr, ElementSize);
+end;
+
+function TCUDAMemData.Data<EType>(X, Y, Z: Integer): GCUDAHostElementAccess<EType>;
+var
+  ptr: PByte;
+  size: Integer;
+begin
+  if (FMemoryType <> mtHost) and not FMapping then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strOnlyHostData);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if FMapping then
+    ptr := PByte(FMappedMemory)
+  else
+    ptr := PByte(GetData);
+  size := ElementSize * (X + fWidth*(Y  + Z * fHeight));
+  if size > DataSize then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strOutOfRange);
+    {$ENDIF}
+    Abort;
+  end;
+  Inc(ptr, size);
+  SetElementAccessAddress(ptr, ElementSize);
+end;
+
+destructor TCUDAMemData.Destroy;
+begin
+  if Assigned(fTexture) then
+    fTexture.MemDataArray := nil;
+  DestroyHandles;
+  inherited;
+end;
+
+procedure TCUDAMemData.CuNotifyChange(AChange: TCUDAChange);
+begin
+  inherited CuNotifyChange(AChange);
+  if Assigned(fTexture) then
+    fTexture.CuNotifyChange(cuchArray);
+end;
+
+procedure TCUDAMemData.SetMemoryType(const AType: TCUDAMemType);
+begin
+  if FMemoryType <> AType then
+  begin
+    FMemoryType := AType;
+    if (AType = mtArray) and (fChannelsType = ctDouble) then
+      SetChannelType(ctFloat);
+    CuNotifyChange(cuchArray);
+  end;
+end;
+
+procedure TCUDAMemData.SetWidth(const Value: Integer);
+begin
+  Assert(Value > 0);
+  if Value <> fWidth then
+  begin
+    fWidth := Value;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAMemData.UnMap;
+begin
+  if not FMapping then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogErrorFmt(strFailUnmap, [Name]);
+    {$ENDIF}
+    Abort;
+  end;
+
+  Context.Requires;
+
+  case FMemoryType of
+    mtHost:
+      begin
+        FStatus := CUDA_SUCCESS;
+      end;
+    mtDevice:
+      begin
+        FStatus := cuMemcpyHtoD(GetData, FMappedMemory, DataSize);
+        if FStatus = CUDA_SUCCESS then
+          FStatus := cuMemFreeHost(FMappedMemory);
+      end;
+    mtArray:
+      begin
+        FStatus := cuMemcpyHtoA(GetArrayHandle, 0, FMappedMemory, DataSize);
+        if FStatus = CUDA_SUCCESS then
+          FStatus := cuMemFreeHost(FMappedMemory);
+      end;
+  end;
+
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  FMapping := False;
+  FMappedMemory := nil;
+end;
+
+procedure TCUDAMemData.SetHeight(const Value: Integer);
+begin
+  Assert(Value >= 0);
+  if Value <> fHeight then
+  begin
+    fHeight := Value;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAMemData.SetDepth(const Value: Integer);
+begin
+  Assert(Value >= 0);
+  if Value <> fDepth then
+  begin
+    fDepth := Value;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAMemData.SetChannelType(const Value: TCUDAChannelType);
+begin
+  Assert(Value <> ctUndefined);
+  if (FMemoryType = mtArray) and (Value = ctDouble) then
+    exit;
+  if Value <> fChannelsType then
+  begin
+    fChannelsType := Value;
+    CuNotifyChange(cuchFormat);
+  end;
+end;
+
+procedure TCUDAMemData.SetChannelNum(const Value: TCUDAChannelNum);
+begin
+  if Value <> fChannelsNum then
+  begin
+    fChannelsNum := Value;
+    CuNotifyChange(cuchFormat);
+  end;
+end;
+
+function TCUDAMemData.GetData: TCUdeviceptr;
+begin
+  if not Assigned(fData) and (FChanges <> []) then
+    AllocateHandles;
+  Result := fData;
+end;
+
+function TCUDAMemData.GetArrayHandle: PCUarray;
+begin
+  if not Assigned(FHandle) and (FChanges <> []) then
+    AllocateHandles;
+  Result := FHandle;
+end;
+
+procedure TCUDAMemData.AllocateHandles;
+const
+  cArrayFormat: array [ctUInt8 .. ctFloat] of TCUarray_format =
+    (CU_AD_FORMAT_UNSIGNED_INT8, CU_AD_FORMAT_UNSIGNED_INT16,
+    CU_AD_FORMAT_UNSIGNED_INT32, CU_AD_FORMAT_SIGNED_INT8,
+    CU_AD_FORMAT_SIGNED_INT16, CU_AD_FORMAT_SIGNED_INT32, CU_AD_FORMAT_HALF,
+    CU_AD_FORMAT_FLOAT);
+var
+  h, d: Integer;
+  Array2DDesc: TCUDA_ARRAY_DESCRIPTOR;
+  // Array3DDesc: TCUDA_ARRAY3D_DESCRIPTOR;
+  AlignedSize: Integer;
+begin
+  DestroyHandles;
+
+  if cuchFormat in FChanges then
+  begin
+    FElementSize := cChannelTypeSize[fChannelsType] * (Ord(fChannelsNum) + 1);
+  end;
+
+  h := Height;
+  if h = 0 then
+    h := 1;
+  d := Depth;
+  if d = 0 then
+    d := 1;
+  FDataSize := Width * h * d * ElementSize;
+
+  FStatus := CUDA_SUCCESS;
+  Context.Requires;
+  case FMemoryType of
+    mtHost:
+      FStatus := cuMemAllocHost(fData, DataSize);
+    mtDevice:
+      begin
+        if fHeight > 1 then
+        begin
+          AlignedSize := RoundUpToPowerOf2(ElementSize);
+          if AlignedSize < 4 then
+            AlignedSize := 4;
+          if AlignedSize > 16 then
+            AlignedSize := 16;
+          FStatus := cuMemAllocPitch(TCUdeviceptr(fData), fPitch,
+            Width * ElementSize, fHeight, AlignedSize);
+        end
+        else
+          FStatus := cuMemAlloc(TCUdeviceptr(fData), DataSize);
+      end;
+    mtArray:
+      begin
+        Array2DDesc.Width := fWidth;
+        Array2DDesc.Height := fHeight;
+        Array2DDesc.Format := cArrayFormat[fChannelsType];
+        Array2DDesc.NumChannels := Ord(fChannelsNum) + 1;
+        FStatus := cuArrayCreate(FHandle, Array2DDesc);
+      end;
+  end;
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  FChanges := [];
+  inherited;
+end;
+
+procedure TCUDAMemData.DestroyHandles;
+begin
+  case FMemoryType of
+    mtHost, mtDevice:
+      if fData = nil then
+        exit;
+    mtArray:
+      if FHandle = nil then
+        exit;
+  end;
+
+  inherited;
+
+  if not FOpenGLRefArray then
+  begin
+    Context.Requires;
+    case FMemoryType of
+      mtHost:
+        if Assigned(fData) then
+          cuMemFreeHost(fData);
+
+      mtDevice:
+        if Assigned(fData) then
+          cuMemFree(fData);
+
+      mtArray:
+        if Assigned(FHandle) then
+        begin
+          if Assigned(fTexture) then
+            fTexture.MemDataArray := nil;
+          cuArrayDestroy(FHandle);
+        end;
+    end;
+    Context.Release;
+  end;
+  FHandle := nil;
+  fData := nil;
+  fPitch := 0;
+  FDataSize := 0;
+  FElementSize := 0;
+  FOpenGLRefArray := False;
+end;
+
+procedure TCUDAMemData.FillMem(const Value);
+var
+  Ptr: TCUdeviceptr;
+  RowSize: Integer;
+begin
+  if FMemoryType = mtDevice then
+  begin
+    Ptr := GetData;
+    FStatus := CUDA_SUCCESS;
+    Context.Requires;
+    // 1D memory set
+    if fHeight = 0 then
+    begin
+      case fChannelsType of
+        ctUInt8, ctInt8:
+          FStatus := cuMemsetD8(Ptr, Byte(Value), DataSize);
+        ctUInt16, ctInt16, ctHalfFloat:
+          FStatus := cuMemsetD16(Ptr, Word(Value), DataSize div SizeOf(Word));
+        ctUInt32, ctInt32, ctFloat:
+          FStatus := cuMemsetD32(Ptr, DWord(Value), DataSize div SizeOf(DWord));
+      end;
+    end
+    // 2D memory set
+    else
+    begin
+      RowSize := (1 + Ord(fChannelsNum)) * fWidth;
+      case fChannelsType of
+        ctUInt8, ctInt8:
+          FStatus := cuMemsetD2D8(Ptr, fPitch, Byte(Value), RowSize, fHeight);
+        ctUInt16, ctInt16, ctHalfFloat:
+          FStatus := cuMemsetD2D16(Ptr, fPitch, Word(Value), RowSize,
+            fHeight);
+        ctUInt32, ctInt32, ctFloat:
+          FStatus := cuMemsetD2D32(Ptr, fPitch, DWord(Value),
+            RowSize, fHeight);
+      end;
+    end;
+    Context.Release;
+    if FStatus <> CUDA_SUCCESS then
+      Abort
+  end;
+end;
+
+procedure TCUDAMemData.CopyTo(const ADstMemData: TCUDAMemData);
+var
+  copyParam2D: TCUDA_MEMCPY2D;
+  // copyParam3D: TCUDA_MEMCPY3D;
+  Size: Integer;
+begin
+  if not Assigned(ADstMemData) then
+    exit;
+
+  Assert((fDepth = 0) and (ADstMemData.Depth = 0),
+    'Volume copying not yet implemented');
+
+  FStatus := CUDA_SUCCESS;
+
+  if (Height = ADstMemData.Height) and (Height = 0) then
+  begin
+    // 1D copying
+    Size := MinInteger(DataSize, ADstMemData.DataSize);
+    Context.Requires;
+    case MemoryType of
+      mtHost:
+        case ADstMemData.MemoryType of
+          mtHost:
+            Move(RawData^, ADstMemData.RawData^, Size);
+          mtDevice:
+            FStatus := cuMemcpyHtoD(ADstMemData.RawData, RawData, Size);
+          mtArray:
+            FStatus := cuMemcpyHtoA(ADstMemData.ArrayHandle, 0, RawData, Size);
+        end;
+
+      mtDevice:
+        case ADstMemData.MemoryType of
+          mtHost:
+            FStatus := cuMemcpyDtoH(ADstMemData.RawData, RawData, Size);
+          mtDevice:
+            FStatus := cuMemcpyDtoD(ADstMemData.RawData, RawData, Size);
+          mtArray:
+            FStatus := cuMemcpyDtoA(ADstMemData.ArrayHandle, 0, RawData, Size);
+        end;
+
+      mtArray:
+        case ADstMemData.MemoryType of
+          mtHost:
+            FStatus := cuMemcpyAtoH(ADstMemData.RawData, ArrayHandle, 0, Size);
+          mtDevice:
+            FStatus := cuMemcpyAtoD(ADstMemData.RawData, ArrayHandle, 0, Size);
+          mtArray:
+            FStatus := cuMemcpyAtoA(ADstMemData.ArrayHandle, 0,
+              ArrayHandle, 0, Size);
+        end;
+    end;
+    Context.Release;
+  end
+  else
+  begin
+    // 2D copying
+    FillChar(copyParam2D, SizeOf(copyParam2D), 0);
+    // Setup source copy parameters
+    case MemoryType of
+      mtHost:
+        begin
+          copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
+          copyParam2D.srcHost := TCUdeviceptr(RawData);
+        end;
+      mtDevice:
+        begin
+          copyParam2D.srcMemoryType := CU_MEMORYTYPE_DEVICE;
+          copyParam2D.srcDevice := TCUdeviceptr(RawData);
+        end;
+      mtArray:
+        begin
+          copyParam2D.srcMemoryType := CU_MEMORYTYPE_ARRAY;
+          copyParam2D.srcArray := ArrayHandle;
+        end;
+    end;
+    copyParam2D.srcPitch := fPitch;
+    // Setup destination copy parameters
+    case ADstMemData.FMemoryType of
+      mtHost:
+        begin
+          copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
+          copyParam2D.dstHost := TCUdeviceptr(ADstMemData.RawData);
+        end;
+      mtDevice:
+        begin
+          copyParam2D.dstMemoryType := CU_MEMORYTYPE_DEVICE;
+          copyParam2D.dstDevice := TCUdeviceptr(ADstMemData.RawData);
+        end;
+      mtArray:
+        begin
+          copyParam2D.dstMemoryType := CU_MEMORYTYPE_ARRAY;
+          copyParam2D.dstArray := ADstMemData.ArrayHandle;
+        end;
+    end;
+    copyParam2D.dstPitch := ADstMemData.fPitch;
+
+    copyParam2D.WidthInBytes := Cardinal(MinInteger(ElementSize * Width,
+      ADstMemData.ElementSize * ADstMemData.Width));
+    copyParam2D.Height := MinInteger(fHeight, ADstMemData.Height);
+
+    Context.Requires;
+    FStatus := cuMemcpy2D(@copyParam2D);
+    Context.Release;
+  end;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort
+end;
+
+procedure TCUDAMemData.SubCopyTo(const ADstMemData: TCUDAMemData;
+  ASrcXYZ, ADstXYZ, ASizes: IntElement.TVector3);
+var
+  copyParam2D: TCUDA_MEMCPY2D;
+  // copyParam3D: TCUDA_MEMCPY3D;
+begin
+  if not Assigned(ADstMemData) then
+    exit;
+
+  // Clamp sizes
+  ASrcXYZ[0] := MinInteger(ASrcXYZ[0], Width - 1);
+  ASrcXYZ[1] := MinInteger(ASrcXYZ[1], MaxInteger(Height - 1, 0));
+  ASrcXYZ[2] := MinInteger(ASrcXYZ[2], MaxInteger(Depth - 1, 0));
+
+  ADstXYZ[0] := MinInteger(ADstXYZ[0], ADstMemData.Width - 1);
+  ADstXYZ[1] := MinInteger(ADstXYZ[1], MaxInteger(ADstMemData.Height - 1, 0));
+  ADstXYZ[2] := MinInteger(ADstXYZ[2], MaxInteger(ADstMemData.Depth - 1, 0));
+
+  ASizes[0] := MinInteger(ASizes[0], Width, ADstMemData.Width);
+  ASizes[1] := MinInteger(ASizes[1], Height, ADstMemData.Height);
+  ASizes[2] := MinInteger(ASizes[2], Depth, ADstMemData.Depth);
+
+  Assert(ASizes[2] = 0, 'Volume copying not yet implemented');
+
+  FStatus := CUDA_SUCCESS;
+
+  if ASizes[2] = 0 then
+  begin
+    // 2D copying
+    FillChar(copyParam2D, SizeOf(copyParam2D), 0);
+    // Setup source copy parameters
+    case MemoryType of
+      mtHost:
+        begin
+          copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
+          copyParam2D.srcHost := TCUdeviceptr(RawData);
+        end;
+      mtDevice:
+        begin
+          copyParam2D.srcMemoryType := CU_MEMORYTYPE_DEVICE;
+          copyParam2D.srcDevice := TCUdeviceptr(RawData);
+        end;
+      mtArray:
+        begin
+          copyParam2D.srcMemoryType := CU_MEMORYTYPE_ARRAY;
+          copyParam2D.srcArray := ArrayHandle;
+        end;
+    end;
+    copyParam2D.srcXInBytes := ASrcXYZ[0] * FElementSize;
+    copyParam2D.srcY := ASrcXYZ[1];
+    copyParam2D.srcPitch := fPitch;
+    // Setup destination copy parameters
+    case ADstMemData.FMemoryType of
+      mtHost:
+        begin
+          copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
+          copyParam2D.dstHost := TCUdeviceptr(ADstMemData.RawData);
+        end;
+      mtDevice:
+        begin
+          copyParam2D.dstMemoryType := CU_MEMORYTYPE_DEVICE;
+          copyParam2D.dstDevice := TCUdeviceptr(ADstMemData.RawData);
+        end;
+      mtArray:
+        begin
+          copyParam2D.dstMemoryType := CU_MEMORYTYPE_ARRAY;
+          copyParam2D.dstArray := ADstMemData.ArrayHandle;
+        end;
+    end;
+    copyParam2D.dstXInBytes := ADstXYZ[0] * ADstMemData.FElementSize;
+    copyParam2D.dstY := ADstXYZ[1];
+    copyParam2D.dstPitch := ADstMemData.fPitch;
+
+    copyParam2D.WidthInBytes := Cardinal(MinInteger(ElementSize * ASizes[0],
+      ADstMemData.ElementSize * ASizes[0]));
+    copyParam2D.Height := MaxInteger(ASizes[1], 1);
+
+    Context.Requires;
+    FStatus := cuMemcpy2D(@copyParam2D);
+    Context.Release;
+  end;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort
+end;
+
+procedure TCUDAMemData.CopyTo(const AGLImage: TGLBitmap32);
+var
+  copyParam2D: TCUDA_MEMCPY2D;
+  // copyParam3D: TCUDA_MEMCPY3D;
+begin
+  if not Assigned(AGLImage) then
+    exit;
+
+  Assert((fDepth = 0) and (AGLImage.Depth = 0),
+    'Volume copying not yet implemented');
+
+  FillChar(copyParam2D, SizeOf(copyParam2D), 0);
+  // Setup source copy parameters
+  case FMemoryType of
+    mtHost:
+      begin
+        copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
+        copyParam2D.srcHost := TCUdeviceptr(RawData);
+      end;
+    mtDevice:
+      begin
+        copyParam2D.srcMemoryType := CU_MEMORYTYPE_DEVICE;
+        copyParam2D.srcDevice := TCUdeviceptr(RawData);
+      end;
+    mtArray:
+      begin
+        copyParam2D.srcMemoryType := CU_MEMORYTYPE_ARRAY;
+        copyParam2D.srcArray := ArrayHandle;
+      end;
+  end;
+  copyParam2D.srcPitch := fPitch;
+  // Setup destination copy parameters
+  copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
+  copyParam2D.dstHost := AGLImage.Data;
+  copyParam2D.dstPitch := AGLImage.ElementSize * AGLImage.Width;
+
+  copyParam2D.WidthInBytes :=
+    MinInteger(Cardinal(ElementSize * Width), copyParam2D.dstPitch);
+  copyParam2D.Height := MinInteger(Height, AGLImage.Height);
+
+  Context.Requires;
+  FStatus := cuMemcpy2D(@copyParam2D);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+procedure TCUDAMemData.CopyTo(const AGLGraphic: TCUDAGraphicResource;
+  aAttr: string);
+var
+  pMap: TCUdeviceptr;
+  mapSize: Integer;
+begin
+  if not Assigned(AGLGraphic.FHandle[0]) then
+    exit;
+
+  Context.Requires;
+  AGLGraphic.MapResources;
+
+  if AGLGraphic.FResourceType = rtBuffer then
+  begin
+    if Length(aAttr) = 0 then
+    begin
+      mapSize := AGLGraphic.GetElementArrayDataSize;
+      pMap := AGLGraphic.GetElementArrayAddress;
+    end
+    else
+    begin
+      mapSize := AGLGraphic.GetAttributeArraySize(aAttr);
+      pMap := AGLGraphic.GetAttributeArrayAddress(aAttr);
+    end;
+  end
+  else
+  begin
+    // TODO: image copying
+    AGLGraphic.UnMapResources;
+    Context.Release;
+    exit;
+  end;
+
+  FStatus := CUDA_SUCCESS;
+
+  case FMemoryType of
+    mtHost:
+      FStatus := cuMemcpyHtoD(pMap, RawData, MinInteger(DataSize, mapSize));
+    mtDevice:
+      FStatus := cuMemcpyDtoD(pMap, RawData, MinInteger(DataSize, mapSize));
+    mtArray:
+      FStatus := cuMemcpyAtoD(pMap, ArrayHandle, 0,
+        MinInteger(DataSize, mapSize));
+  end;
+
+  AGLGraphic.UnMapResources;
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+procedure TCUDAMemData.CopyFrom(const ASrcMemData: TCUDAMemData);
+begin
+  ASrcMemData.CopyTo(Self);
+end;
+
+procedure TCUDAMemData.CopyFrom(const AGLImage: TGLBitmap32);
+var
+  copyParam2D: TCUDA_MEMCPY2D;
+  // copyParam3D: TCUDA_MEMCPY3D;
+begin
+  if not Assigned(AGLImage) then
+    exit;
+
+  Assert((fDepth = 0) and (AGLImage.Depth = 0),
+    'Volume copying not yet implemented');
+
+  FillChar(copyParam2D, SizeOf(copyParam2D), 0);
+  // Setup destination copy parameters
+  case FMemoryType of
+    mtHost:
+      begin
+        copyParam2D.dstMemoryType := CU_MEMORYTYPE_HOST;
+        copyParam2D.dstHost := TCUdeviceptr(RawData);
+      end;
+    mtDevice:
+      begin
+        copyParam2D.dstMemoryType := CU_MEMORYTYPE_DEVICE;
+        copyParam2D.dstDevice := TCUdeviceptr(RawData);
+      end;
+    mtArray:
+      begin
+        copyParam2D.dstMemoryType := CU_MEMORYTYPE_ARRAY;
+        copyParam2D.dstArray := ArrayHandle;
+      end;
+  end;
+  copyParam2D.dstPitch := fPitch;
+  // Setup source copy parameters
+  copyParam2D.srcMemoryType := CU_MEMORYTYPE_HOST;
+  copyParam2D.srcHost := AGLImage.Data;
+  copyParam2D.srcPitch := AGLImage.ElementSize * AGLImage.Width;
+
+  copyParam2D.WidthInBytes := MinInteger(
+    Cardinal(ElementSize * fWidth), copyParam2D.srcPitch);
+  copyParam2D.Height := MinInteger(fHeight, AGLImage.Height);
+
+  Context.Requires;
+  FStatus := cuMemcpy2D(@copyParam2D);
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+procedure TCUDAMemData.CopyFrom(const AGLGraphic: TCUDAGraphicResource;
+  aAttr: string);
+var
+  pMap: TCUdeviceptr;
+  mapSize: Integer;
+begin
+  if not Assigned(AGLGraphic.FHandle[0]) then
+    exit;
+
+  Assert(fDepth = 0, 'Volume copying not yet implemented');
+
+  Context.Requires;
+  AGLGraphic.MapResources;
+
+  if AGLGraphic.fResourceType = rtBuffer then
+  begin
+    if Length(aAttr) = 0 then
+    begin
+      mapSize := AGLGraphic.GetElementArrayDataSize;
+      pMap := AGLGraphic.GetElementArrayAddress;
+    end
+    else
+    begin
+      mapSize := AGLGraphic.GetAttributeArraySize(aAttr);
+      pMap := AGLGraphic.GetAttributeArrayAddress(aAttr);
+    end;
+  end
+  else
+  begin
+    // TODO: image copying
+    AGLGraphic.UnMapResources;
+    Context.Release;
+    exit;
+  end;
+
+  FStatus := CUDA_SUCCESS;
+
+
+  case FMemoryType of
+    mtHost:
+      FStatus := cuMemcpyDtoH(RawData, pMap,
+        Cardinal(MinInteger(DataSize, mapSize)));
+    mtDevice:
+      FStatus := cuMemcpyDtoD(RawData, pMap,
+        Cardinal(MinInteger(DataSize, mapSize)));
+    mtArray:
+      FStatus := cuMemcpyDtoA(ArrayHandle, 0, pMap,
+        Cardinal(MinInteger(DataSize, mapSize)));
+  end;
+  AGLGraphic.UnMapResources;
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+end;
+
+function TCUDAMemData.GetIsAllocated: Boolean;
+begin
+  case FMemoryType of
+    mtHost, mtDevice: Result := Assigned(FData);
+    mtArray: Result := Assigned(FHandle);
+    else
+      Result := False;
+  end;
+end;
+
+procedure TCUDAMemData.Map(const AFlags: TCUDAMemMapFlags);
+var
+  LFlag: Cardinal;
+begin
+  if FMapping then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogErrorFmt(strFailMap, [Name]);
+    {$ENDIF}
+    Abort;
+  end;
+
+  LFlag := 0;
+  if mmfPortable in AFlags then
+    LFlag := LFlag or CU_MEMHOSTALLOC_PORTABLE;
+  if mmfFastWrite in AFlags then
+    LFlag := LFlag or CU_MEMHOSTALLOC_WRITECOMBINED;
+
+  Context.Requires;
+  GetData;
+
+  case FMemoryType of
+    mtHost:
+      begin
+        FStatus := cuMemHostGetDevicePointer(
+          FMappedMemory, GetData, 0);
+      end;
+    mtDevice:
+      begin
+        FStatus := cuMemHostAlloc(
+          FMappedMemory, DataSize, LFlag);
+        if FStatus = CUDA_SUCCESS then
+          FStatus := cuMemcpyDtoH(
+            FMappedMemory, GetData, DataSize);
+      end;
+    mtArray:
+      begin
+        FStatus := cuMemHostAlloc(
+          FMappedMemory, DataSize, LFlag);
+        if FStatus = CUDA_SUCCESS then
+          FStatus := cuMemcpyAtoH(
+            FMappedMemory, GetArrayHandle, 0, DataSize);
+      end;
+  end;
+
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  FMapping := True;
+end;
+
+// ------------------
+// ------------------ TCUDATexture ------------------
+// ------------------
+
+constructor TCUDATexture.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  FHandle := nil;
+  fArray := nil;
+  AddressModeS := amClamp;
+  AddressModeT := amClamp;
+  AddressModeR := amClamp;
+  NormalizedCoord := true;
+  ReadAsInteger := false;
+  FilterMode := fmPoint;
+  fFormat := ctUndefined;
+  fChannelNum := cnOne;
+end;
+
+
+destructor TCUDATexture.Destroy;
+begin
+  if Assigned(fArray) then
+    fArray.fTexture := nil;
+  DestroyHandles;
+  inherited;
+end;
+
+function TCUDATexture.GetHandle: PCUtexref;
+begin
+  if not Assigned(FHandle) or (FChanges <> []) then
+    AllocateHandles;
+  Result := FHandle;
+end;
+
+function TCUDATexture.GetIsAllocated: Boolean;
+begin
+  Result := Assigned(FHandle);
+end;
+
+procedure TCUDATexture.AllocateHandles;
+var
+  pTex: PCUtexref;
+  LName: AnsiString;
+  LModule: TCUDAModule;
+  LFlag: Cardinal;
+  LFormat: TCUarray_format;
+  LChanels: Integer;
+begin
+  if not(FMaster is TCUDAModule) then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strModuleAbsent);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if Length(FKernelName) = 0 then
+    exit;
+
+  LModule := TCUDAModule(FMaster);
+
+  LName := AnsiString(FKernelName);
+  Context.Requires;
+  FStatus := cuModuleGetTexRef(pTex, LModule.FHandle, PAnsiChar(LName));
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+  FHandle := pTex;
+
+  Context.Requires;
+  // Apply changes
+  if (cuchArray in FChanges) and Assigned(fArray) then
+  begin
+    CollectStatus(cuTexRefSetArray(FHandle, fArray.ArrayHandle,
+      CU_TRSA_OVERRIDE_FORMAT));
+    fArray.fTexture := Self;
+    // Update format
+    if cuTexRefGetFormat(LFormat, LChanels, FHandle) = CUDA_SUCCESS then
+      CUDAEnumToChannelDesc(LFormat, LChanels, fFormat, fChannelNum);
+  end;
+
+  if cuchAddresMode in FChanges then
+  begin
+    CollectStatus(cuTexRefSetAddressMode(FHandle, 0,
+      cAddressMode[fAddressModeS]));
+    CollectStatus(cuTexRefSetAddressMode(FHandle, 1,
+      cAddressMode[fAddressModeT]));
+    CollectStatus(cuTexRefSetAddressMode(FHandle, 2,
+      cAddressMode[fAddressModeR]));
+  end;
+
+  if cuchFlag in FChanges then
+  begin
+    LFlag := 0;
+    if fNormalizedCoord then
+      LFlag := LFlag or CU_TRSF_NORMALIZED_COORDINATES;
+    if fReadAsInteger then
+      LFlag := LFlag or CU_TRSF_READ_AS_INTEGER;
+    CollectStatus(cuTexRefSetFlags(FHandle, LFlag));
+  end;
+
+  if cuchFilterMode in FChanges then
+    CollectStatus(cuTexRefSetFilterMode(FHandle, cFilterMode[fFilterMode]));
+
+  Context.Release;
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  FChanges := [];
+  inherited;
+end;
+
+procedure TCUDATexture.DestroyHandles;
+begin
+  if Assigned(FHandle) then
+  begin
+    FHandle := nil;
+    inherited;
+  end;
+end;
+
+procedure TCUDATexture.SetKernelName(const AName: string);
+begin
+  if csLoading in ComponentState then
+    FKernelName := AName
+  else if not Assigned(FHandle) then
+  begin
+    FKernelName := AName;
+    AllocateHandles;
+  end;
+end;
+
+// SetAddressModeS
+//
+
+procedure TCUDATexture.SetAddressModeS(const AMode: TCuAddresMode);
+begin
+  if AMode <> fAddressModeS then
+  begin
+    fAddressModeS := AMode;
+    CuNotifyChange(cuchAddresMode);
+  end;
+end;
+
+procedure TCUDATexture.SetAddressModeT(const AMode: TCuAddresMode);
+begin
+  if AMode <> fAddressModeT then
+  begin
+    fAddressModeT := AMode;
+    CuNotifyChange(cuchAddresMode);
+  end;
+end;
+
+procedure TCUDATexture.SetAddressModeR(const AMode: TCuAddresMode);
+begin
+  if AMode <> fAddressModeR then
+  begin
+    fAddressModeR := AMode;
+    CuNotifyChange(cuchAddresMode);
+  end;
+end;
+
+procedure TCUDATexture.SetNormalizedCoord(const flag: Boolean);
+begin
+  if flag <> fNormalizedCoord then
+  begin
+    fNormalizedCoord := flag;
+    CuNotifyChange(cuchFlag);
+  end;
+end;
+
+procedure TCUDATexture.SetReadAsInteger(const flag: Boolean);
+begin
+  if flag <> fReadAsInteger then
+  begin
+    fReadAsInteger := flag;
+    CuNotifyChange(cuchFlag);
+  end;
+end;
+
+procedure TCUDATexture.SetFilterMode(const mode: TCuFilterMode);
+begin
+  if mode <> fFilterMode then
+  begin
+    fFilterMode := mode;
+    CuNotifyChange(cuchFilterMode);
+  end;
+end;
+
+procedure TCUDATexture.SetFormat(AValue: TCUDAChannelType);
+begin
+  if csLoading in ComponentState then
+    fFormat := AValue
+  else if not Assigned(FHandle) then
+  begin
+    fFormat := AValue;
+    CuNotifyChange(cuchFormat);
+  end;
+end;
+
+procedure TCUDATexture.SetArray(Value: TCUDAMemData);
+begin
+  if Value <> fArray then
+  begin
+    if Assigned(fArray) then
+      fArray.fTexture := nil;
+    if Assigned(Value) then
+    begin
+      if Value.MemoryType <> mtArray then
+        Value := nil
+      else
+      begin
+        fFormat := Value.fChannelsType;
+        fChannelNum := Value.fChannelsNum;
+        if Assigned(Value.fTexture) then
+          Value.fTexture.MemDataArray := nil;
+        Value.fTexture := Self;
+      end;
+    end
+    else
+    begin
+      fFormat := ctUndefined;
+      fChannelNum := cnOne;
+    end;
+    fArray := Value;
+    CuNotifyChange(cuchArray);
+  end;
+end;
+
+procedure TCUDATexture.SetChannelNum(AValue: TCUDAChannelNum);
+begin
+  if csLoading in ComponentState then
+    fChannelNum := AValue
+  else if not Assigned(FHandle) then
+  begin
+    fChannelNum := AValue;
+    CuNotifyChange(cuchFormat);
+  end;
+end;
+
+ 
+// ------------------
+// ------------------ TCUDAGraphicResource ------------------
+// ------------------
+
+procedure TCUDAGraphicResource.SetMapping(const Value: TCUDAMapping);
+begin
+  if fMapping <> Value then
+  begin
+    fMapping := Value;
+    CuNotifyChange(cuchMapping);
+  end;
+end;
+
+function TCUDAGraphicResource.GetIsAllocated: Boolean;
+var
+  I: Integer;
+begin
+  for I := 0 to High(FHandle) do
+    if Assigned(FHandle[I]) then
+      exit(True);
+  Result := False;
+end;
+
+procedure TCUDAGraphicResource.OnGLHandleAllocate(Sender: TGLVirtualHandle;
+  var Handle: Cardinal);
+begin
+  Handle := GLVirtualHandleCounter;
+  Inc(GLVirtualHandleCounter);
+end;
+
+procedure TCUDAGraphicResource.OnGLHandleDestroy(Sender: TGLVirtualHandle;
+  var Handle: Cardinal);
+begin
+  DestroyHandles;
+end;
+
+procedure TCUDAGraphicResource.SetArray(var AArray: TCUDAMemData;
+  AHandle: PCUarray; ForGLTexture, Volume: Boolean);
+var
+  Desc2D: TCUDA_ARRAY_DESCRIPTOR;
+  Desc3D: TCUDA_ARRAY3D_DESCRIPTOR;
+begin
+  Context.Requires;
+  // Get array descriptor
+  if Volume then
+    FStatus := cuArray3DGetDescriptor(Desc3D, AHandle)
+  else
+    FStatus := cuArrayGetDescriptor(Desc2D, AHandle);
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  // Set array parameters
+  if not Assigned(AArray) then
+    AArray := TCUDAMemData.Create(Owner);
+
+  with AArray do
+  begin
+    if FHandle <> AHandle then
+    begin
+      DestroyHandles;
+      FHandle := AHandle;
+    end;
+    FOpenGLRefArray := ForGLTexture;
+    FMemoryType := mtArray;
+    FPitch := 0;
+    if Volume then
+    begin
+      fWidth := Desc3D.Width;
+      fHeight := Desc3D.Height;
+      fDepth := Desc3D.Depth;
+      CUDAEnumToChannelDesc(Desc3D.Format, Desc3D.NumChannels, fChannelsType,
+        fChannelsNum);
+    end
+    else
+    begin
+      fWidth := Desc2D.Width;
+      fHeight := Desc2D.Height;
+      fDepth := 0;
+      CUDAEnumToChannelDesc(Desc2D.Format, Desc2D.NumChannels, fChannelsType,
+        fChannelsNum);
+    end;
+    FElementSize := cChannelTypeSize[fChannelsType] * (Ord(fChannelsNum) + 1);
+  end;
+end;
+
+ 
+// ------------------
+// ------------------ TCUDAUniform ------------------
+// ------------------
+
+constructor TCUDAUniform.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  FHandle := nil;
+  FSize := 0;
+  FType := TCUDAType.CustomType;
+  FDefined := false;
+end;
+
+destructor TCUDAUniform.Destroy;
+begin
+  DestroyHandles;
+  inherited;
+end;
+
+function TCUDAUniform.GetIsAllocated: Boolean;
+begin
+  Result := Assigned(FHandle);
+end;
+
+procedure TCUDAUniform.SetCustomType(const AValue: string);
+begin
+  if csLoading in ComponentState then
+    FCustomType := AValue
+  else if not Assigned(FHandle) then
+  begin
+    FCustomType := AValue;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAUniform.SetDefined(AValue: Boolean);
+begin
+  if not Assigned(FHandle) then
+    FDefined := AValue;
+end;
+
+procedure TCUDAUniform.SetKernelName(const AName: string);
+begin
+  if csLoading in ComponentState then
+    FKernelName := AName
+  else if not Assigned(FHandle) then
+  begin
+    FKernelName := AName;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAUniform.SetSize(const AValue: Cardinal);
+begin
+  if csLoading in ComponentState then
+    FSize := AValue
+  else if not Assigned(FHandle) then
+  begin
+    FSize := AValue;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAUniform.SetType(AValue: TCUDAType);
+begin
+  if csLoading in ComponentState then
+    FType := AValue
+  else if not Assigned(FHandle) then
+  begin
+    FType := AValue;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAUniform.SetRef(AValue: Boolean);
+begin
+  if csLoading in ComponentState then
+    FRef := AValue
+  else if not Assigned(FHandle) then
+  begin
+    FRef := AValue;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+ 
+
+// ------------------
+// ------------------ TCUDAConstant ------------------
+// ------------------
+
+procedure TCUDAConstant.AllocateHandles;
+var
+  LName: AnsiString;
+  LModule: TCUDAModule;
+begin
+  if not(FMaster is TCUDAModule) then
+  begin
+    {$IFDEF USE_LOGGING}
+      LogError(strModuleAbsent);
+    {$ENDIF}
+    Abort;
+  end;
+
+  if Length(FKernelName) = 0 then
+    exit;
+
+  LModule := TCUDAModule(FMaster);
+
+  LName := AnsiString(FKernelName);
+  DestroyHandles;
+
+  Context.Requires;
+  FStatus := cuModuleGetGlobal(FHandle, FSize, LModule.FHandle,
+    PAnsiChar(LName));
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  FChanges := [];
+  inherited;
+end;
+
+procedure TCUDAConstant.DestroyHandles;
+begin
+  if Assigned(FHandle) then
+  begin
+    FHandle := nil;
+    inherited;
+  end;
+end;
+
+function TCUDAConstant.GetDeviceAddress: TCUdeviceptr;
+begin
+  if (FChanges <> []) or (FHandle = nil) then
+    AllocateHandles;
+  Result := FHandle;
+end;
+
+ 
+
+// ------------------
+// ------------------ TCUDAFuncParam ------------------
+// ------------------
+
+procedure TCUDAFuncParam.AllocateHandles;
+begin
+  if Assigned(Master) and (Master is TCUDAFunction) then
+  begin
+    FHandle := TCUDAFunction(Master).FHandle;
+    if Assigned(FHandle) then
+      inherited;
+  end;
+end;
+
+constructor TCUDAFuncParam.Create(AOwner: TComponent);
+begin
+  inherited;
+  FHandle := nil;
+  FRef := false;
+end;
+
+procedure TCUDAFuncParam.DestroyHandles;
+begin
+  if Assigned(FHandle) then
+  begin
+    FHandle := nil;
+    inherited;
+  end;
+end;
+
+ 
+// ------------------------------------------------------------------
+initialization
+// ------------------------------------------------------------------
+
+  RegisterClasses([TGLCUDA, TGLCUDACompiler, TCUDAModule, TCUDAFunction,
+    TCUDATexture, TCUDAMemData, TCUDAConstant, TCUDAFuncParam]);
+
+end.

+ 445 - 451
Source/GLS.CUDACompiler.pas → Source/GPU.CUDACompiler.pas

@@ -1,451 +1,445 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDACompiler;
-
-(*
-  Component allows to compile the CUDA-source (*.cu) file.
-  in design- and runtime.
-  To work requires the presence of CUDA Toolkit 3.X and MS Visual Studio C++.
-*)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  Winapi.Windows,
-  Winapi.ShellAPI,
-  Winapi.TlHelp32,
-  System.UITypes,
-  System.SysUtils,
-  System.Classes,
-  Vcl.Forms,
-  VCL.Dialogs,
-  GLS.Strings,
-
-  GLS.CUDAParser,
-
-  GLS.ApplicationFileIO
-  {$IFDEF USE_LOGGING},GLSLog;{$ELSE};{$ENDIF}
-
-
-type
-  TGLSCUDACompilerOutput = (codeUndefined, codePtx, codeCubin, codeGpu);
-
-  (*
-    compute_10 Basic features
-    compute_11 + atomic memory operations on global memory
-    compute_12 + atomic memory operations on shared memory
-               + vote instructions
-    compute_13 + double precision floating point support
-    Compute_20 + FERMI support
-  *)
-  TGLSCUDAVirtArch = (compute_10, compute_11, compute_12, compute_13, compute_20);
-
-  (*
-    sm_10 ISA_1 Basic features
-    sm_11 + atomic memory operations on global memory
-    sm_12 + atomic memory operations on shared memory
-          + vote instructions
-    sm_13 + double precision floating point support
-    sm_20 + FERMI support.
-    sm_21 + Unknown
-  *)
-  TGLSCUDARealArch = (sm_10, sm_11, sm_12, sm_13, sm_20, sm_21);
-  TGLSCUDARealArchs = set of TGLSCUDARealArch;
-
-  TGLSCUDACompiler = class(TComponent)
-  private
-    FNVCCPath: string;
-    FCppCompilerPath: string;
-    FProduct: TStringList;
-    FProjectModule: string;
-    FSourceCodeFile: string;
-    FConsoleContent: string;
-    FOutputCodeType: TGLSCUDACompilerOutput;
-    FVirtualArch: TGLSCUDAVirtArch;
-    FRealArch: TGLSCUDARealArchs;
-    FMaxRegisterCount: Integer;
-    FModuleInfo: TCUDAModuleInfo;
-    procedure SetMaxRegisterCount(Value: Integer);
-    procedure SetOutputCodeType(const Value: TGLSCUDACompilerOutput);
-    function StoreProjectModule: Boolean;
-    procedure SetRealArch(AValue: TGLSCUDARealArchs);
-    procedure SetNVCCPath(const AValue: string);
-    procedure SetCppCompilerPath(const AValue: string);
-  protected
-    procedure Loaded; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure Assign(Source: TPersistent); override;
-    procedure SetSourceCodeFile(const AFileName: string);
-    function Compile: Boolean;
-    {  Product of compilation. }
-    property Product: TStringList read FProduct write FProduct;
-    property ModuleInfo: TCUDAModuleInfo read FModuleInfo;
-    property ConsoleContent: string read FConsoleContent;
-  published
-    {  NVidia CUDA Compiler. }
-    property NVCCPath: string read FNVCCPath write SetNVCCPath;
-    {  Microsoft Visual Studio Compiler.
-      Pascal compiler is still not done. }
-    property CppCompilerPath: string read FCppCompilerPath
-      write SetCppCompilerPath;
-    {  Full file name of source code file. }
-    property SourceCodeFile: string read FSourceCodeFile;
-    {  Disign-time only property.
-      Make choose of one of the Project module as CUDA kernel source }
-    property ProjectModule: string read FProjectModule write FProjectModule
-      stored StoreProjectModule;
-    {  Output code type for module kernel
-      - Ptx - Parallel Thread Execution
-      - Cubin - CUDA Binary }
-    property OutputCodeType: TGLSCUDACompilerOutput read FOutputCodeType
-      write setOutputCodeType default codePtx;
-    {  In the CUDA naming scheme,
-        GPUs are named sm_xy,
-        where x denotes the GPU generation number,
-        and y the version in that generation. }
-    property RealArchitecture: TGLSCUDARealArchs read FRealArch
-      write SetRealArch default [sm_13];
-    {  Virtual architecture. }
-    property VirtualArchitecture: TGLSCUDAVirtArch read FVirtualArch
-      write FVirtualArch default compute_13;
-    {  Maximum registers that kernel can use. }
-    property MaxRegisterCount: Integer read FMaxRegisterCount
-      write SetMaxRegisterCount default 32;
-  end;
-
-  TFindCuFileFunc = function(var AModuleName: string): Boolean;
-
-var
-  vFindCuFileFunc: TFindCuFileFunc;
-
-//------------------------------------------------------------------
-implementation
-//------------------------------------------------------------------
-
-
-// ------------------
-// ------------------ TGLSCUDACompiler ------------------
-// ------------------
-
-constructor TGLSCUDACompiler.Create(AOwner: TComponent);
-var
-  path: string;
-begin
-  inherited Create(AOwner);
-  FOutputCodeType := codePtx;
-  FVirtualArch := compute_13;
-  FRealArch := [sm_13];
-  FMaxRegisterCount := 32;
-  FNVCCPath := '';
-  path := GetEnvironmentVariable('CUDA_BIN_PATH');
-  if Length(path) > 0 then
-  begin
-    path := IncludeTrailingPathDelimiter(path);
-    if FileExists(path + 'nvcc.exe') then
-      FNVCCPath := path;
-  end;
-  path := 'C:\Program Files\Microsoft Visual Studio 2015\VC\bin\';
-  if FileExists(path + 'cl.exe') then
-    FCppCompilerPath := path
-  else
-        FCppCompilerPath := '';
-  FProjectModule := 'none';
-  FModuleInfo := TCUDAModuleInfo.Create;
-end;
-
-destructor TGLSCUDACompiler.Destroy;
-begin
-  FModuleInfo.Destroy;
-  inherited;
-end;
-
-procedure TGLSCUDACompiler.Loaded;
-var
-  LStr: string;
-begin
-  inherited;
-  if (FProjectModule <> 'none') and Assigned(vFindCuFileFunc) then
-  begin
-    LStr := FProjectModule;
-    if vFindCuFileFunc(LStr) then
-      FSourceCodeFile := LStr
-    else
-      FSourceCodeFile := '';
-  end;
-end;
-
-procedure TGLSCUDACompiler.Assign(Source: TPersistent);
-var
-  compiler: TGLSCUDACompiler;
-begin
-  if Source is TGLSCUDACompiler then
-  begin
-    compiler := TGLSCUDACompiler(Source);
-    FSourceCodeFile := compiler.FSourceCodeFile;
-    FOutputCodeType := compiler.FOutputCodeType;
-    FVirtualArch := compiler.FVirtualArch;
-  end;
-  inherited Assign(Source);
-end;
-
-function TGLSCUDACompiler.Compile: Boolean;
-const
-  ReadBufferSize = 1048576; // 1 MB Buffer
-  cSM: array[TGLSCUDARealArch] of string =
-    ('sm_10', 'sm_11', 'sm_12', 'sm_13', 'sm_20', 'sm_21');
-var
-  tepmPath, tempFile, tempFileExt: string;
-  commands, nvcc, pathfile, msg: string;
-  rArch: TGLSCUDARealArch;
-  CodeSource: TStringList;
-
-  Security: TSecurityAttributes;
-  ReadPipe, WritePipe: THandle;
-  start: TStartUpInfo;
-  ProcessInfo: TProcessInformation;
-  Buffer: PAnsiChar;
-  TotalBytesRead, BytesRead: DWORD;
-  Apprunning, n, BytesLeftThisMessage, TotalBytesAvail: Integer;
-begin
-  if not FileExists(FSourceCodeFile) then
-  begin
-    if csDesigning in ComponentState then
-      MessageDlg(strSourceFileNotFound, TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
-    else
-     {$IFDEF USE_LOGGING}
-       LogError(strSourceFileNotFound);
-     {$ENDIF}
-      exit(false);
-  end;
-  CodeSource := TStringList.Create;
-  CodeSource.LoadFromFile(FSourceCodeFile);
-  Result := false;
-  FConsoleContent := '';
-
-  if FileExists(FNVCCPath + 'nvcc.exe') and
-    FileExists(FCppCompilerPath + 'cl.exe') and Assigned(FProduct) then
-  begin
-    tepmPath := GetEnvironmentVariable('TEMP');
-    tepmPath := IncludeTrailingPathDelimiter(tepmPath);
-    tempFile := tepmPath + 'temp';
-    CodeSource.SaveToFile(tempFile + '.cu');
-    commands := '"' + tempFile + '.cu" ';
-
-    commands := commands + '-arch ';
-    case FVirtualArch of
-      compute_10:
-        commands := commands + 'compute_10 ';
-      compute_11:
-        commands := commands + 'compute_11 ';
-      compute_12:
-        commands := commands + 'compute_12 ';
-      compute_13:
-        commands := commands + 'compute_13 ';
-      compute_20:
-        commands := commands + 'compute_20 ';
-    end;
-
-    commands := commands + '-code ';
-    for rArch in FRealArch do
-      commands := commands + cSM[rArch] + ', ';
-    commands[Length(commands)-1] := ' ';
-
-    commands := commands + '-ccbin ';
-    pathfile := Copy(FCppCompilerPath, 1, Length(FCppCompilerPath) - 1);
-    commands := commands + '"' + pathfile + '" ';
-    commands := commands + '-Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " ';
-    commands := commands + '-maxrregcount=' + IntToStr(FMaxRegisterCount) + ' ';
-    commands := commands + '-m32 ';
-    case FOutputCodeType of
-      codePtx:
-        begin
-          commands := commands + '--ptx ';
-          tempFileExt := 'ptx';
-        end;
-      codeCubin:
-        begin
-          commands := commands + '--cubin ';
-          tempFileExt := 'cubin';
-        end;
-      codeGpu:
-        begin
-          commands := commands + '--gpu ';
-          tempFileExt := 'gpu';
-        end;
-    end;
-    commands := commands + '-o "' + tempFile + '.' + tempFileExt + '" ';
-    commands := commands + #00;
-    nvcc := FNVCCPath + 'nvcc.exe ';
-
-    with Security do
-    begin
-      nlength := SizeOf(TSecurityAttributes);
-      binherithandle := true;
-      lpsecuritydescriptor := nil;
-    end;
-
-    if CreatePipe(ReadPipe, WritePipe, @Security, 0) then
-    begin
-      // Redirect In- and Output through STARTUPINFO structure
-
-      Buffer := AllocMem(ReadBufferSize + 1);
-      FillChar(start, SizeOf(start), #0);
-      start.cb := SizeOf(start);
-      start.hStdOutput := WritePipe;
-      start.hStdInput := ReadPipe;
-      start.hStdError := WritePipe;
-      start.dwFlags := STARTF_USESTDHANDLES + STARTF_USESHOWWINDOW;
-      start.wShowWindow := SW_HIDE;
-
-      // Creates a Console Child Process with redirected input and output
-      if CreateProcess(nil, PChar(nvcc+commands), @Security, @Security, true,
-        CREATE_NO_WINDOW or NORMAL_PRIORITY_CLASS, nil, nil, start,
-        ProcessInfo) then
-      begin
-        n := 0;
-        TotalBytesRead := 0;
-        repeat
-          // Increase counter to prevent an endless loop if the process is dead
-          Inc(n, 1);
-
-          // wait for end of child process
-          Apprunning := WaitForSingleObject(ProcessInfo.hProcess, 100);
-          Application.ProcessMessages;
-
-          // it is important to read from time to time the output information
-          // so that the pipe is not blocked by an overflow. New information
-          // can be written from the console app to the pipe only if there is
-          // enough buffer space.
-
-          if not PeekNamedPipe(ReadPipe, @Buffer[TotalBytesRead],
-            ReadBufferSize, @BytesRead, @TotalBytesAvail,
-            @BytesLeftThisMessage) then
-            break
-          else if BytesRead > 0 then
-            ReadFile(ReadPipe, Buffer[TotalBytesRead], BytesRead,
-              BytesRead, nil);
-          TotalBytesRead := TotalBytesRead + BytesRead;
-        until (Apprunning <> WAIT_TIMEOUT) or (n > 150);
-
-        Buffer[TotalBytesRead] := #00;
-        OemToCharA(Buffer, Buffer);
-      end
-      else
-      begin
-        if csDesigning in ComponentState then
-          MessageDlg(strFailRunNVCC, TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
-        else
-         {$IFDEF USE_LOGGING}
-            LogError(strFailRunNVCC);
-         {$ENDIF}
-      end;
-
-      pathfile := tempFile + '.' + tempFileExt;
-      if FileExists(pathfile) then
-      begin
-        FProduct.LoadFromFile(pathfile);
-        FModuleInfo.ParseModule(CodeSource, FProduct);
-
-        if csDesigning in ComponentState then
-          FProduct.OnChange(Self);
-        DeleteFile(pathfile);
-        Result := true;
-        FConsoleContent := string(StrPas(Buffer));
-        msg := Format(strSuccessCompilation, [FConsoleContent]);
-        if csDesigning in ComponentState then
-          MessageDlg(msg, TMsgDlgType.mtInformation, [TMsgDlgBtn.mbOK], 0)
-        else
-         {$IFDEF USE_LOGGING}
-           LogInfo(msg);
-         {$ENDIF}
-      end
-      else
-      begin
-        msg := Format(strFailCompilation, [StrPas(Buffer)]);
-        if csDesigning in ComponentState then
-          MessageDlg(msg, TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
-        else
-          {$IFDEF USE_LOGGING}
-            LogError(msg);
-          {$ENDIF}
-      end;
-      FreeMem(Buffer);
-      CloseHandle(ProcessInfo.hProcess);
-      CloseHandle(ProcessInfo.hThread);
-      CloseHandle(ReadPipe);
-      CloseHandle(WritePipe);
-    end
-    else
-    begin
-      if csDesigning in ComponentState then
-        MessageDlg(strFailCreatePipe, TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
-      else
-       {$IFDEF USE_LOGGING}
-        GLSLogger.LogError(strFailCreatePipe);
-       {$ENDIF}
-    end;
-
-    pathfile := tempFile + '.cu';
-    DeleteFile(pathfile);
-  end;
-  CodeSource.Free;
-end;
-
-procedure TGLSCUDACompiler.SetCppCompilerPath(const AValue: string);
-begin
-  if FileExists(AValue + 'cl.exe') then
-    FCppCompilerPath := AValue;
-end;
-
-procedure TGLSCUDACompiler.setMaxRegisterCount(Value: Integer);
-begin
-  if Value <> FMaxRegisterCount then
-  begin
-    Value := 4 * (Value div 4);
-    if Value < 4 then
-      Value := 4;
-    if Value > 128 then
-      Value := 128;
-    FMaxRegisterCount := Value;
-  end;
-end;
-
-procedure TGLSCUDACompiler.SetNVCCPath(const AValue: string);
-begin
-  if FileExists(AValue + 'nvcc.exe') then
-    FNVCCPath := AValue;
-end;
-
-procedure TGLSCUDACompiler.setOutputCodeType(const Value
-  : TGLSCUDACompilerOutput);
-begin
-  if Value = codeUndefined then
-    exit;
-  FOutputCodeType := Value;
-end;
-
-procedure TGLSCUDACompiler.SetRealArch(AValue: TGLSCUDARealArchs);
-begin
-  if AValue = [] then
-    AValue := [sm_10];
-  FRealArch := AValue;
-end;
-
-procedure TGLSCUDACompiler.SetSourceCodeFile(const AFileName: string);
-begin
-  if FileStreamExists(AFileName) then
-    FSourceCodeFile := AFileName;
-end;
-
-function TGLSCUDACompiler.StoreProjectModule: Boolean;
-begin
-  Result := FProjectModule <> 'none';
-end;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDACompiler;
+
+(*
+  Component allows to compile the CUDA-source (*.cu) file.
+  in design- and runtime.
+  To work requires the presence of CUDA Toolkit 3.X and MS Visual Studio C++.
+*)
+
+interface
+
+uses
+  Winapi.Windows,
+  Winapi.ShellAPI,
+  Winapi.TlHelp32,
+  System.UITypes,
+  System.SysUtils,
+  System.Classes,
+  Vcl.Forms,
+  VCL.Dialogs,
+
+  GPU.CUDAParser,
+  GLS.ApplicationFileIO;
+
+type
+  TGLSCUDACompilerOutput = (codeUndefined, codePtx, codeCubin, codeGpu);
+
+  (*
+    compute_10 Basic features
+    compute_11 + atomic memory operations on global memory
+    compute_12 + atomic memory operations on shared memory
+               + vote instructions
+    compute_13 + double precision floating point support
+    Compute_20 + FERMI support
+  *)
+  TGLSCUDAVirtArch = (compute_10, compute_11, compute_12, compute_13, compute_20);
+
+  (*
+    sm_10 ISA_1 Basic features
+    sm_11 + atomic memory operations on global memory
+    sm_12 + atomic memory operations on shared memory
+          + vote instructions
+    sm_13 + double precision floating point support
+    sm_20 + FERMI support.
+    sm_21 + Unknown
+  *)
+  TGLSCUDARealArch = (sm_10, sm_11, sm_12, sm_13, sm_20, sm_21);
+  TGLSCUDARealArchs = set of TGLSCUDARealArch;
+
+  TGLCUDACompiler = class(TComponent)
+  private
+    FNVCCPath: string;
+    FCppCompilerPath: string;
+    FProduct: TStringList;
+    FProjectModule: string;
+    FSourceCodeFile: string;
+    FConsoleContent: string;
+    FOutputCodeType: TGLSCUDACompilerOutput;
+    FVirtualArch: TGLSCUDAVirtArch;
+    FRealArch: TGLSCUDARealArchs;
+    FMaxRegisterCount: Integer;
+    FModuleInfo: TCUDAModuleInfo;
+    procedure SetMaxRegisterCount(Value: Integer);
+    procedure SetOutputCodeType(const Value: TGLSCUDACompilerOutput);
+    function StoreProjectModule: Boolean;
+    procedure SetRealArch(AValue: TGLSCUDARealArchs);
+    procedure SetNVCCPath(const AValue: string);
+    procedure SetCppCompilerPath(const AValue: string);
+  protected
+    procedure Loaded; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure Assign(Source: TPersistent); override;
+    procedure SetSourceCodeFile(const AFileName: string);
+    function Compile: Boolean;
+    //  Product of compilation.
+    property Product: TStringList read FProduct write FProduct;
+    property ModuleInfo: TCUDAModuleInfo read FModuleInfo;
+    property ConsoleContent: string read FConsoleContent;
+  published
+    //  NVidia CUDA Compiler.
+    property NVCCPath: string read FNVCCPath write SetNVCCPath;
+    (*  Microsoft Visual Studio Compiler.
+      Pascal compiler is still not done. *)
+    property CppCompilerPath: string read FCppCompilerPath
+      write SetCppCompilerPath;
+    //  Full file name of source code file.
+    property SourceCodeFile: string read FSourceCodeFile;
+    (*  Disign-time only property.
+      Make choose of one of the Project module as CUDA kernel source *)
+    property ProjectModule: string read FProjectModule write FProjectModule
+      stored StoreProjectModule;
+    (*  Output code type for module kernel
+      - Ptx - Parallel Thread Execution
+      - Cubin - CUDA Binary *)
+    property OutputCodeType: TGLSCUDACompilerOutput read FOutputCodeType
+      write setOutputCodeType default codePtx;
+    (*  In the CUDA naming scheme,
+        GPUs are named sm_xy,
+        where x denotes the GPU generation number,
+        and y the version in that generation. *)
+    property RealArchitecture: TGLSCUDARealArchs read FRealArch
+      write SetRealArch default [sm_13];
+    //  Virtual architecture.
+    property VirtualArchitecture: TGLSCUDAVirtArch read FVirtualArch
+      write FVirtualArch default compute_13;
+    //  Maximum registers that kernel can use.
+    property MaxRegisterCount: Integer read FMaxRegisterCount
+      write SetMaxRegisterCount default 32;
+  end;
+
+  TFindCuFileFunc = function(var AModuleName: string): Boolean;
+
+var
+  vFindCuFileFunc: TFindCuFileFunc;
+
+//------------------------------------------------------------------
+implementation
+//------------------------------------------------------------------
+
+
+// ------------------
+// ------------------ TGLCUDACompiler ------------------
+// ------------------
+
+constructor TGLCUDACompiler.Create(AOwner: TComponent);
+var
+  path: string;
+begin
+  inherited Create(AOwner);
+  FOutputCodeType := codePtx;
+  FVirtualArch := compute_13;
+  FRealArch := [sm_13];
+  FMaxRegisterCount := 32;
+  FNVCCPath := '';
+  path := GetEnvironmentVariable('CUDA_BIN_PATH');
+  if Length(path) > 0 then
+  begin
+    path := IncludeTrailingPathDelimiter(path);
+    if FileExists(path + 'nvcc.exe') then
+      FNVCCPath := path;
+  end;
+  path := 'C:\Program Files\Microsoft Visual Studio 2015\VC\bin\';
+  if FileExists(path + 'cl.exe') then
+    FCppCompilerPath := path
+  else
+        FCppCompilerPath := '';
+  FProjectModule := 'none';
+  FModuleInfo := TCUDAModuleInfo.Create;
+end;
+
+destructor TGLCUDACompiler.Destroy;
+begin
+  FModuleInfo.Destroy;
+  inherited;
+end;
+
+procedure TGLCUDACompiler.Loaded;
+var
+  LStr: string;
+begin
+  inherited;
+  if (FProjectModule <> 'none') and Assigned(vFindCuFileFunc) then
+  begin
+    LStr := FProjectModule;
+    if vFindCuFileFunc(LStr) then
+      FSourceCodeFile := LStr
+    else
+      FSourceCodeFile := '';
+  end;
+end;
+
+procedure TGLCUDACompiler.Assign(Source: TPersistent);
+var
+  compiler: TGLCUDACompiler;
+begin
+  if Source is TGLCUDACompiler then
+  begin
+    compiler := TGLCUDACompiler(Source);
+    FSourceCodeFile := compiler.FSourceCodeFile;
+    FOutputCodeType := compiler.FOutputCodeType;
+    FVirtualArch := compiler.FVirtualArch;
+  end;
+  inherited Assign(Source);
+end;
+
+function TGLCUDACompiler.Compile: Boolean;
+const
+  ReadBufferSize = 1048576; // 1 MB Buffer
+  cSM: array[TGLSCUDARealArch] of string =
+    ('sm_10', 'sm_11', 'sm_12', 'sm_13', 'sm_20', 'sm_21');
+var
+  tepmPath, tempFile, tempFileExt: string;
+  commands, nvcc, pathfile, msg: string;
+  rArch: TGLSCUDARealArch;
+  CodeSource: TStringList;
+
+  Security: TSecurityAttributes;
+  ReadPipe, WritePipe: THandle;
+  start: TStartUpInfo;
+  ProcessInfo: TProcessInformation;
+  Buffer: PAnsiChar;
+  TotalBytesRead, BytesRead: DWORD;
+  Apprunning, n, BytesLeftThisMessage, TotalBytesAvail: Integer;
+begin
+  if not FileExists(FSourceCodeFile) then
+  begin
+    if csDesigning in ComponentState then
+      MessageDlg('Source File Not Found', TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
+    else
+     {$IFDEF USE_LOGGING}
+       LogError('Source File Not Found');
+     {$ENDIF}
+      exit(false);
+  end;
+  CodeSource := TStringList.Create;
+  CodeSource.LoadFromFile(FSourceCodeFile);
+  Result := false;
+  FConsoleContent := '';
+
+  if FileExists(FNVCCPath + 'nvcc.exe') and
+    FileExists(FCppCompilerPath + 'cl.exe') and Assigned(FProduct) then
+  begin
+    tepmPath := GetEnvironmentVariable('TEMP');
+    tepmPath := IncludeTrailingPathDelimiter(tepmPath);
+    tempFile := tepmPath + 'temp';
+    CodeSource.SaveToFile(tempFile + '.cu');
+    commands := '"' + tempFile + '.cu" ';
+
+    commands := commands + '-arch ';
+    case FVirtualArch of
+      compute_10:
+        commands := commands + 'compute_10 ';
+      compute_11:
+        commands := commands + 'compute_11 ';
+      compute_12:
+        commands := commands + 'compute_12 ';
+      compute_13:
+        commands := commands + 'compute_13 ';
+      compute_20:
+        commands := commands + 'compute_20 ';
+    end;
+
+    commands := commands + '-code ';
+    for rArch in FRealArch do
+      commands := commands + cSM[rArch] + ', ';
+    commands[Length(commands)-1] := ' ';
+
+    commands := commands + '-ccbin ';
+    pathfile := Copy(FCppCompilerPath, 1, Length(FCppCompilerPath) - 1);
+    commands := commands + '"' + pathfile + '" ';
+    commands := commands + '-Xcompiler "/EHsc /W3 /nologo /O2 /Zi /MT " ';
+    commands := commands + '-maxrregcount=' + IntToStr(FMaxRegisterCount) + ' ';
+    commands := commands + '-m32 ';
+    case FOutputCodeType of
+      codePtx:
+        begin
+          commands := commands + '--ptx ';
+          tempFileExt := 'ptx';
+        end;
+      codeCubin:
+        begin
+          commands := commands + '--cubin ';
+          tempFileExt := 'cubin';
+        end;
+      codeGpu:
+        begin
+          commands := commands + '--gpu ';
+          tempFileExt := 'gpu';
+        end;
+    end;
+    commands := commands + '-o "' + tempFile + '.' + tempFileExt + '" ';
+    commands := commands + #00;
+    nvcc := FNVCCPath + 'nvcc.exe ';
+
+    with Security do
+    begin
+      nlength := SizeOf(TSecurityAttributes);
+      binherithandle := true;
+      lpsecuritydescriptor := nil;
+    end;
+
+    if CreatePipe(ReadPipe, WritePipe, @Security, 0) then
+    begin
+      // Redirect In- and Output through STARTUPINFO structure
+
+      Buffer := AllocMem(ReadBufferSize + 1);
+      FillChar(start, SizeOf(start), #0);
+      start.cb := SizeOf(start);
+      start.hStdOutput := WritePipe;
+      start.hStdInput := ReadPipe;
+      start.hStdError := WritePipe;
+      start.dwFlags := STARTF_USESTDHANDLES + STARTF_USESHOWWINDOW;
+      start.wShowWindow := SW_HIDE;
+
+      // Creates a Console Child Process with redirected input and output
+      if CreateProcess(nil, PChar(nvcc+commands), @Security, @Security, true,
+        CREATE_NO_WINDOW or NORMAL_PRIORITY_CLASS, nil, nil, start,
+        ProcessInfo) then
+      begin
+        n := 0;
+        TotalBytesRead := 0;
+        repeat
+          // Increase counter to prevent an endless loop if the process is dead
+          Inc(n, 1);
+
+          // wait for end of child process
+          Apprunning := WaitForSingleObject(ProcessInfo.hProcess, 100);
+          Application.ProcessMessages;
+
+          // it is important to read from time to time the output information
+          // so that the pipe is not blocked by an overflow. New information
+          // can be written from the console app to the pipe only if there is
+          // enough buffer space.
+
+          if not PeekNamedPipe(ReadPipe, @Buffer[TotalBytesRead],
+            ReadBufferSize, @BytesRead, @TotalBytesAvail,
+            @BytesLeftThisMessage) then
+            break
+          else if BytesRead > 0 then
+            ReadFile(ReadPipe, Buffer[TotalBytesRead], BytesRead,
+              BytesRead, nil);
+          TotalBytesRead := TotalBytesRead + BytesRead;
+        until (Apprunning <> WAIT_TIMEOUT) or (n > 150);
+
+        Buffer[TotalBytesRead] := #00;
+        OemToCharA(Buffer, Buffer);
+      end
+      else
+      begin
+        if csDesigning in ComponentState then
+          MessageDlg('Fail Run NVCC', TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
+        else
+         {$IFDEF USE_LOGGING}
+            LogError('Fail Run NVCC');
+         {$ENDIF}
+      end;
+
+      pathfile := tempFile + '.' + tempFileExt;
+      if FileExists(pathfile) then
+      begin
+        FProduct.LoadFromFile(pathfile);
+        FModuleInfo.ParseModule(CodeSource, FProduct);
+
+        if csDesigning in ComponentState then
+          FProduct.OnChange(Self);
+        DeleteFile(pathfile);
+        Result := true;
+        FConsoleContent := string(StrPas(Buffer));
+        msg := Format('Success Compilation', [FConsoleContent]);
+        if csDesigning in ComponentState then
+          MessageDlg(msg, TMsgDlgType.mtInformation, [TMsgDlgBtn.mbOK], 0)
+        else
+         {$IFDEF USE_LOGGING}
+           LogInfo(msg);
+         {$ENDIF}
+      end
+      else
+      begin
+        msg := Format('Fail Compilation', [StrPas(Buffer)]);
+        if csDesigning in ComponentState then
+          MessageDlg(msg, TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
+        else
+          {$IFDEF USE_LOGGING}
+            LogError(msg);
+          {$ENDIF}
+      end;
+      FreeMem(Buffer);
+      CloseHandle(ProcessInfo.hProcess);
+      CloseHandle(ProcessInfo.hThread);
+      CloseHandle(ReadPipe);
+      CloseHandle(WritePipe);
+    end
+    else
+    begin
+      if csDesigning in ComponentState then
+        MessageDlg('Fail Create Pipe', TMsgDlgType.mtError, [TMsgDlgBtn.mbOK], 0)
+      else
+       {$IFDEF USE_LOGGING}
+        GLSLogger.LogError(strFailCreatePipe);
+       {$ENDIF}
+    end;
+
+    pathfile := tempFile + '.cu';
+    DeleteFile(pathfile);
+  end;
+  CodeSource.Free;
+end;
+
+procedure TGLCUDACompiler.SetCppCompilerPath(const AValue: string);
+begin
+  if FileExists(AValue + 'cl.exe') then
+    FCppCompilerPath := AValue;
+end;
+
+procedure TGLCUDACompiler.setMaxRegisterCount(Value: Integer);
+begin
+  if Value <> FMaxRegisterCount then
+  begin
+    Value := 4 * (Value div 4);
+    if Value < 4 then
+      Value := 4;
+    if Value > 128 then
+      Value := 128;
+    FMaxRegisterCount := Value;
+  end;
+end;
+
+procedure TGLCUDACompiler.SetNVCCPath(const AValue: string);
+begin
+  if FileExists(AValue + 'nvcc.exe') then
+    FNVCCPath := AValue;
+end;
+
+procedure TGLCUDACompiler.setOutputCodeType(const Value
+  : TGLSCUDACompilerOutput);
+begin
+  if Value = codeUndefined then
+    exit;
+  FOutputCodeType := Value;
+end;
+
+procedure TGLCUDACompiler.SetRealArch(AValue: TGLSCUDARealArchs);
+begin
+  if AValue = [] then
+    AValue := [sm_10];
+  FRealArch := AValue;
+end;
+
+procedure TGLCUDACompiler.SetSourceCodeFile(const AFileName: string);
+begin
+  if FileStreamExists(AFileName) then
+    FSourceCodeFile := AFileName;
+end;
+
+function TGLCUDACompiler.StoreProjectModule: Boolean;
+begin
+  Result := FProjectModule <> 'none';
+end;
+
+end.

+ 865 - 868
Source/GLS.CUDAContext.pas → Source/GPU.CUDAContext.pas

@@ -1,868 +1,865 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAContext;
-
-(* CUDA context *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  System.Classes,
-  System.SysUtils,
-  GLS.Strings,
-  GLS.BaseClasses,
-  GLS.Context,
-  GLS.Generics,
-
-  GLS.Logger,
-  GLS.CUDARunTime,
-  GLS.CUDAApi;
-
-type
-
-  TCUDADimensions = class(TGLUpdateAbleObject)
-  private
-    FXYZ: TDim3;
-    FMaxXYZ: TDim3;
-    FReadOnly: Boolean;
-    function GetDimComponent(index: Integer): Integer;
-    procedure SetDimComponent(index: Integer; Value: Integer);
-    function GetMaxDimComponent(index: Integer): Integer;
-    procedure SetMaxDimComponent(index: Integer; Value: Integer);
-  public
-    constructor Create(AOwner: TPersistent); override;
-    procedure Assign(Source: TPersistent); override;
-    property MaxSizeX: Integer index 0 read GetMaxDimComponent
-      write SetMaxDimComponent;
-    property MaxSizeY: Integer index 1 read GetMaxDimComponent
-      write SetMaxDimComponent;
-    property MaxSizeZ: Integer index 2 read GetMaxDimComponent
-      write SetMaxDimComponent;
-    property ReadOnlyValue: Boolean read FReadOnly write FReadOnly;
-  published
-    { Published Properties }
-    property SizeX: Integer index 0 read GetDimComponent write SetDimComponent
-      default 1;
-    property SizeY: Integer index 1 read GetDimComponent write SetDimComponent
-      default 1;
-    property SizeZ: Integer index 2 read GetDimComponent write SetDimComponent
-      default 1;
-  end;
-
-  TCUDAContext = class;
-  TOnOpenGLInteropInit = procedure(out Context: TGLContext) of object;
-
-  TCUDADevice = class(TPersistent)
-  private
-    fID: Integer;
-    fHandle: TCUdevice;
-    fGFlops: Integer;
-    fDeviceProperties: TCudaDeviceProp;
-    FSuitable: Boolean;
-    FUsed: Boolean;
-    fMaxThreadsDim: TCUDADimensions;
-    fMaxGridSize: TCUDADimensions;
-  protected
-    function GetName: string;
-  public
-    constructor Create; reintroduce;
-    destructor Destroy; override;
-    procedure Assign(Source: TPersistent); override;
-    {  Returns in bytes the total amount of memory
-      available on the device dev in bytes. }
-    function TotalMemory: Cardinal;
-  published
-    property Name: string read GetName;
-    property TotalGlobalMem: NativeUInt read fDeviceProperties.TotalGlobalMem;
-    property SharedMemPerBlock: NativeUInt read fDeviceProperties.SharedMemPerBlock;
-    property RegsPerBlock: Integer read fDeviceProperties.RegsPerBlock;
-    property WarpSize: Integer read fDeviceProperties.WarpSize;
-    property MemPitch: NativeUInt read fDeviceProperties.MemPitch;
-    property MaxThreadsPerBlock: Integer
-      read fDeviceProperties.MaxThreadsPerBlock;
-    property MaxThreadsDim: TCUDADimensions read fMaxThreadsDim;
-    property MaxGridSize: TCUDADimensions read fMaxGridSize;
-    property ClockRate: Integer read fDeviceProperties.ClockRate;
-    property TotalConstMem: NativeUInt read fDeviceProperties.TotalConstMem;
-    property Major: Integer read fDeviceProperties.Major;
-    property Minor: Integer read fDeviceProperties.Minor;
-    property TextureAlignment: NativeUInt read fDeviceProperties.TextureAlignment;
-    property DeviceOverlap: Integer read fDeviceProperties.DeviceOverlap;
-    property MultiProcessorCount: Integer
-      read fDeviceProperties.MultiProcessorCount;
-  end;
-
-  TGLSCUDADevice = class(TComponent)
-  private
-    FSelectDeviceName: string;
-    function GetDevice: TCUDADevice;
-    procedure SetDevice(AValue: TCUDADevice);
-    procedure SetDeviceName(const AName: string);
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    function Suitable: Boolean;
-  published
-    property SelectDevice: string read FSelectDeviceName write SetDeviceName;
-    property Device: TCUDADevice read GetDevice write SetDevice;
-  end;
-
-  TCUDAHandlesMaster = class(TComponent)
-  protected
-    function GetContext: TCUDAContext; virtual; abstract;
-    procedure AllocateHandles; virtual;
-    procedure DestroyHandles; virtual;
-  end;
-
-  TCUDAHandleList = GThreadList<TCUDAHandlesMaster>;
-
-  TCUDAContext = class(TObject)
-  private
-    fHandle: PCUcontext;
-    FDevice: TCUDADevice;
-    FOnOpenGLInteropInit: TOnOpenGLInteropInit;
-    FHandleList: TCUDAHandleList;
-    procedure SetDevice(ADevice: TCUDADevice);
-  public
-    constructor Create;
-    destructor Destroy; override;
-    {  Destroy all handles based of this context. }
-    procedure DestroyAllHandles;
-    {  Pushes context onto CPU thread’s stack of current contexts. }
-    procedure Requires;
-    {  Pops context from current CPU thread. }
-    procedure Release;
-    function IsValid: Boolean; inline;
-    property Device: TCUDADevice read FDevice write SetDevice;
-    property OnOpenGLInteropInit: TOnOpenGLInteropInit read FOnOpenGLInteropInit
-      write FOnOpenGLInteropInit;
-  end;
-
-  TCUDADeviceList = GList<TCUDADevice>;
-  TCUDAContextList = GList<TCUDAContext>;
-
-  {  Static class of CUDA contexts manager. }
-   CUDAContextManager = class
-  private
-    class var fDeviceList: TCUDADeviceList;
-    class var fContextList: TCUDAContextList;
-    class var FContextStacks: array of TCUDAContextList;
-  protected
-    class function GetDevice(i: Integer): TCUDADevice;
-    class function GetNextUnusedDevice: TCUDADevice;
-    class procedure RegisterContext(aContext: TCUDAContext);
-    class procedure UnRegisterContext(aContext: TCUDAContext);
-    class function GetThreadStack: TCUDAContextList;
-    class function GetContext(i: Integer): TCUDAContext;
-  public
-    //  Management
-    class procedure Init;
-    class procedure Done;
-    class procedure CreateContext(aContext: TCUDAContext);
-    class procedure DestroyContext(aContext: TCUDAContext);
-    class procedure CreateContextOf(ADevice: TCUDADevice);
-    class procedure DestroyContextOf(ADevice: TCUDADevice);
-    class procedure PushContext(aContext: TCUDAContext);
-    class function PopContext: TCUDAContext;
-    //  Fill unused device list to show its in property.
-    class procedure FillUnusedDeviceList(var AList: TStringList);
-    //  Return device by name.
-    class function GetDeviceByName(const AName: string): TCUDADevice;
-    //  Returns the number of CUDA compatiable devices.
-    class function DeviceCount: Integer;
-    //  Access to devices list.
-    property Devices[i: Integer]: TCUDADevice read GetDevice;
-    //  Returns a device that has a maximum Giga flops.
-    class function GetMaxGflopsDevice: TCUDADevice;
-    //  Returns the number of TCUDAcontext object.
-    class function ContextCount: Integer;
-    //  Return CUDA context of current thread.
-    class function GetCurrentThreadContext: TCUDAContext;
-    {  Access to contexts list. }
-    property Contexts[i: Integer]: TCUDAContext read GetContext;
-  end;
-
-//---------------------------------------------------------------------
-implementation
-//---------------------------------------------------------------------
-
-threadvar
-  vStackIndex: Cardinal;
-
-// ------------------
-// ------------------ TCUDADimensions ------------------
-// ------------------
-
-constructor TCUDADimensions.Create(AOwner: TPersistent);
-const
-  cXYZone: TDim3 = (1, 1, 1);
-  cXYZmax: TDim3 = (MaxInt, MaxInt, MaxInt);
-begin
-  inherited Create(AOwner);
-  FReadOnly := False;
-  FXYZ := cXYZone;
-  FMaxXYZ := cXYZmax;
-end;
-
-procedure TCUDADimensions.Assign(Source: TPersistent);
-begin
-  if Source is TCUDADimensions then
-  begin
-    FMaxXYZ[0] := TCUDADimensions(Source).FMaxXYZ[0];
-    FMaxXYZ[1] := TCUDADimensions(Source).FMaxXYZ[1];
-    FMaxXYZ[2] := TCUDADimensions(Source).FMaxXYZ[2];
-    FXYZ[0] := TCUDADimensions(Source).FXYZ[0];
-    FXYZ[1] := TCUDADimensions(Source).FXYZ[1];
-    FXYZ[2] := TCUDADimensions(Source).FXYZ[2];
-    NotifyChange(Self);
-  end;
-  inherited Assign(Source);
-end;
-
-function TCUDADimensions.GetDimComponent(index: Integer): Integer;
-begin
-  Result := FXYZ[index];
-end;
-
-procedure TCUDADimensions.SetDimComponent(index: Integer; Value: Integer);
-var
-  v: LongWord;
-begin
-  if not FReadOnly then
-  begin
-    if Value < 1 then
-      v := 1
-    else
-      v := LongWord(Value);
-    if v > FMaxXYZ[index] then
-      v := FMaxXYZ[index];
-    FXYZ[index] := v;
-    NotifyChange(Self);
-  end;
-end;
-
-function TCUDADimensions.GetMaxDimComponent(index: Integer): Integer;
-begin
-  Result := FMaxXYZ[index];
-end;
-
-procedure TCUDADimensions.SetMaxDimComponent(index: Integer; Value: Integer);
-begin
-  if not FReadOnly then
-  begin
-    if Value > 0 then
-    begin
-      FMaxXYZ[index] := LongWord(Value);
-      if FXYZ[index] > FMaxXYZ[index] then
-        FXYZ[index] := FMaxXYZ[index];
-      NotifyChange(Self);
-    end;
-  end;
-end;
-
-// ------------------
-// ------------------ TCUDADevice ------------------
-// ------------------
-
-constructor TCUDADevice.Create;
-begin
-  fMaxThreadsDim := TCUDADimensions.Create(Self);
-  fMaxThreadsDim.ReadOnlyValue := True;
-  fMaxGridSize := TCUDADimensions.Create(Self);
-  fMaxGridSize.ReadOnlyValue := True;
-
-  if IsCUDAInitialized then
-  begin
-    fID := CUDAContextManager.fDeviceList.Count;
-    FUsed := False;
-
-    FSuitable := cuDeviceGet(fHandle, fID) = CUDA_SUCCESS;
-    if FSuitable then
-    begin
-      cuDeviceGetName(@fDeviceProperties.name[0], SizeOf(fDeviceProperties.name), fHandle);
-      cuDeviceTotalMem(@fDeviceProperties.TotalGlobalMem, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.SharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.RegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MemPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.MaxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.ClockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.TotalConstMem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, fHandle);
-      cuDeviceComputeCapability(fDeviceProperties.Major, fDeviceProperties.Minor, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.TextureAlignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.DeviceOverlap, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, fHandle);
-      cuDeviceGetAttribute(@fDeviceProperties.DeviceOverlap, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, fHandle);
-      fGFlops := fDeviceProperties.MultiProcessorCount *
-        fDeviceProperties.ClockRate;
-      fMaxThreadsDim.FXYZ[0] := fDeviceProperties.MaxThreadsDim[0];
-      fMaxThreadsDim.FXYZ[1] := fDeviceProperties.MaxThreadsDim[1];
-      fMaxThreadsDim.FXYZ[2] := fDeviceProperties.MaxThreadsDim[2];
-      fMaxGridSize.FXYZ[0] := fDeviceProperties.MaxGridSize[0];
-      fMaxGridSize.FXYZ[1] := fDeviceProperties.MaxGridSize[1];
-      fMaxGridSize.FXYZ[2] := fDeviceProperties.MaxGridSize[2];
-    end;
-  end;
-end;
-
-destructor TCUDADevice.Destroy;
-begin
-  fMaxThreadsDim.Destroy;
-  fMaxGridSize.Destroy;
-  inherited;
-end;
-
-procedure TCUDADevice.Assign(Source: TPersistent);
-var
-  dev: TCUDADevice;
-begin
-  if Source is TCUDADevice then
-  begin
-    dev := TCUDADevice(Source);
-    fID := dev.fID;
-    fHandle := dev.fHandle;
-    fGFlops := dev.fGFlops;
-    fDeviceProperties := dev.fDeviceProperties;
-    FSuitable := dev.FSuitable;
-    fMaxThreadsDim.Assign(dev.fMaxThreadsDim);
-    fMaxGridSize.Assign(dev.fMaxGridSize);
-  end;
-  inherited Assign(Source);
-end;
-
-function TCUDADevice.GetName: string;
-begin
-  Result := Format('%s (%d)', [string(fDeviceProperties.name), fID + 1]);
-end;
-
-function TCUDADevice.TotalMemory: Cardinal;
-begin
-  cuDeviceTotalMem(@fDeviceProperties.TotalGlobalMem, fHandle);
-  Result := fDeviceProperties.TotalGlobalMem;
-end;
-
-// ------------------
-// ------------------ TGLSCUDADevice ------------------
-// ------------------
-
-constructor TGLSCUDADevice.Create(AOwner: TComponent);
-var
-  LDevice: TCUDADevice;
-begin
-  inherited Create(AOwner);
-  LDevice := CUDAContextManager.GetNextUnusedDevice;
-  if Assigned(LDevice) and LDevice.FSuitable then
-  begin
-    FSelectDeviceName := LDevice.name;
-    LDevice.FUsed := True;
-  end
-  else
-  begin
-    FSelectDeviceName := '';
-  end;
-end;
-
-destructor TGLSCUDADevice.Destroy;
-var
-  Device: TCUDADevice;
-begin
-  inherited;
-  Device := CUDAContextManager.GetDeviceByName(FSelectDeviceName);
-  if Assigned(Device) then
-    Device.FUsed := False;
-end;
-
-function TGLSCUDADevice.GetDevice: TCUDADevice;
-begin
-  Result := CUDAContextManager.GetDeviceByName(FSelectDeviceName);
-end;
-
-function TGLSCUDADevice.Suitable: Boolean;
-var
-  LDevice: TCUDADevice;
-begin
-  LDevice := GetDevice;
-  Result := Assigned(LDevice);
-  if Result then
-    Result := LDevice.FSuitable;
-end;
-
-procedure TGLSCUDADevice.SetDevice(AValue: TCUDADevice);
-begin
-end;
-
-procedure TGLSCUDADevice.SetDeviceName(const AName: string);
-begin
-  if FSelectDeviceName <> AName then
-  begin
-    CUDAContextManager.DestroyContextOf(Self.Device);
-    FSelectDeviceName := AName;
-    CUDAContextManager.CreateContextOf(Self.Device);
-  end;
-end;
-
-// ------------------
-// ------------------ TCUDAContextManager ------------------
-// ------------------
-
-class procedure CUDAContextManager.Init;
-var
-  dCount: Integer;
-  status: TCUresult;
-  i: Integer;
-begin
-  if InitCUDA and not Assigned(fDeviceList) then
-  begin
-    fDeviceList := TCUDADeviceList.Create;
-    fContextList := TCUDAContextList.Create;
-    dCount := 0;
-    status := cuInit(0);
-    if status = CUDA_SUCCESS then
-      cuDeviceGetCount(dCount);
-
-    // Fill devices list
-    for i := 0 to dCount - 1 do
-      fDeviceList.Add(TCUDADevice.Create);
-  end;
-end;
-
-class procedure CUDAContextManager.Done;
-var
-  I, J: Integer;
-begin
-  if Assigned(fDeviceList) then
-    for i := 0 to fDeviceList.Count - 1 do
-      fDeviceList[i].Free;
-
-  for I := 0 to High(FContextStacks) do
-  begin
-    if FContextStacks[I].Count > 0 then
-    begin
-      GLSLogger.LogError(strUnbalansedUsage);
-      for J := FContextStacks[I].Count - 1 to 0 do
-        FContextStacks[I][J].Release;
-    end;
-    FContextStacks[I].Destroy;
-  end;
-
-  fDeviceList.Free;
-  fContextList.Free;
-  CloseCUDA;
-end;
-
-class procedure CUDAContextManager.RegisterContext(aContext: TCUDAContext);
-begin
-  if fContextList.IndexOf(aContext) >= 0 then
-  begin
-    GLSLogger.LogError(strInvalidContextReg);
-    Abort;
-  end
-  else
-    fContextList.Add(aContext);
-end;
-
-class procedure CUDAContextManager.UnRegisterContext(aContext: TCUDAContext);
-begin
-  if fContextList.IndexOf(aContext) < 0 then
-  begin
-    GLSLogger.LogError(strInvalidContextReg);
-    Abort;
-  end
-  else
-  begin
-    fContextList.Remove(aContext);
-  end;
-end;
-
-class function CUDAContextManager.ContextCount: Integer;
-begin
-  Result := fContextList.Count;
-end;
-
-class function CUDAContextManager.DeviceCount: Integer;
-begin
-  Result := fDeviceList.Count;
-end;
-
-class function CUDAContextManager.GetDevice(i: Integer): TCUDADevice;
-begin
-  Result := nil;
-  if i < fDeviceList.Count then
-    Result := fDeviceList[i];
-end;
-
-class function CUDAContextManager.GetContext(i: Integer): TCUDAContext;
-begin
-  Result := nil;
-  if i < fContextList.Count then
-    Result := fContextList[i];
-end;
-
-class procedure CUDAContextManager.FillUnusedDeviceList(var AList: TStringList);
-var
-  i: Integer;
-begin
-  if not Assigned(AList) then
-    AList := TStringList.Create
-  else
-    AList.Clear;
-  for i := 0 to fDeviceList.Count - 1 do
-    if not fDeviceList[i].FUsed then
-      AList.Add(fDeviceList[i].name);
-end;
-
-class function CUDAContextManager.GetDeviceByName(const AName: string)
-  : TCUDADevice;
-var
-  i: Integer;
-  Device: TCUDADevice;
-begin
-  Result := nil;
-  if Length(AName) = 0 then
-    exit;
-
-  for i := 0 to fDeviceList.Count - 1 do
-  begin
-    Device := fDeviceList[i];
-    if Device.name = AName then
-    begin
-      Result := Device;
-      exit;
-    end;
-  end;
-end;
-
-class function CUDAContextManager.GetMaxGflopsDevice: TCUDADevice;
-var
-  max_gflops: Integer;
-  i: Integer;
-  Device: TCUDADevice;
-begin
-  Device := nil;
-  max_gflops := 0;
-  for i := 0 to fDeviceList.Count - 1 do
-  begin
-    if max_gflops < fDeviceList.Items[i].fGFlops then
-    begin
-      Device := fDeviceList.Items[i];
-      max_gflops := Device.fGFlops;
-    end;
-  end;
-  Result := Device;
-end;
-
-class function CUDAContextManager.GetNextUnusedDevice: TCUDADevice;
-var
-  i: Integer;
-  Device: TCUDADevice;
-begin
-  Result := nil;
-  for i := 0 to fDeviceList.Count - 1 do
-  begin
-    Device := fDeviceList[i];
-    if not Device.FUsed then
-    begin
-      Result := Device;
-      exit;
-    end;
-  end;
-end;
-
-class procedure CUDAContextManager.CreateContext(aContext: TCUDAContext);
-var
-  status: TCUresult;
-  cuOldContext, cuContext: PCUcontext;
-  LGLContext: TGLContext;
-  LStack: TCUDAContextList;
-begin
-  if not Assigned(aContext.FDevice)
-    or not aContext.FDevice.FSuitable then
-  begin
-    GLSLogger.LogError(strNoDeviceToCreate);
-    Abort;
-  end;
-
-  if GetThreadStack.Count > 0 then
-  begin
-    if cuCtxPopCurrent(cuOldContext) <> CUDA_SUCCESS then
-    begin
-      GLSLogger.LogError(strThreadBusy);
-      Abort;
-    end;
-  end
-  else
-    cuOldContext := nil;
-
-  if aContext.IsValid then
-    DestroyContext(aContext);
-
-  RegisterContext(aContext);
-
-  status := CUDA_SUCCESS;
-  if Assigned(aContext.FOnOpenGLInteropInit) then
-  begin
-    aContext.FOnOpenGLInteropInit(LGLContext);
-    if Assigned(LGLContext) and LGLContext.IsValid then
-    begin
-      LGLContext.Activate;
-      cuContext := nil;
-      status := cuGLCtxCreate(cuContext, 0, aContext.FDevice.fHandle);
-      LGLContext.Deactivate;
-    end
-    else
-    begin
-      GLSLogger.LogError(strInvalidGLContext);
-      UnRegisterContext(aContext);
-      Abort;
-    end;
-  end
-  else
-  begin
-    status := cuCtxCreate(cuContext, 0, aContext.FDevice.fHandle);
-  end;
-
-  if (status <> CUDA_SUCCESS) then
-  begin
-    GLSLogger.LogError(cudaGetLastErrorString);
-    UnRegisterContext(aContext);
-    cuCtxDetach(cuContext);
-    Abort;
-  end;
-
-  aContext.fHandle := cuContext;
-
-  // Make context be floating to use it in different thread
-  if cuCtxPopCurrent(cuContext) <> CUDA_SUCCESS then
-  begin
-    LStack := GetThreadStack;
-    LStack.Insert(LStack.Count - 1, aContext);
-    GLSLogger.LogWarning(strMakeFloatingFail);
-  end;
-
-  if Assigned(cuOldContext) then
-    cuCtxPushCurrent(cuOldContext);
-end;
-
-class procedure CUDAContextManager.CreateContextOf(ADevice: TCUDADevice);
-var
-  i: Integer;
-begin
-  if Assigned(ADevice) and ADevice.FSuitable then
-  begin
-    for i := 0 to fContextList.Count do
-      if fContextList[i].FDevice = ADevice then
-        CreateContext(fContextList[i]);
-  end;
-end;
-
-class procedure CUDAContextManager.DestroyContext(aContext: TCUDAContext);
-begin
-  if aContext.IsValid then
-  begin
-    aContext.DestroyAllHandles;
-    cuCtxDestroy(aContext.fHandle);
-    aContext.fHandle := nil;
-    CUDAContextManager.UnRegisterContext(aContext);
-  end;
-end;
-
-class procedure CUDAContextManager.DestroyContextOf(ADevice: TCUDADevice);
-var
-  i: Integer;
-begin
-  if Assigned(ADevice) and ADevice.FSuitable then
-  begin
-    for i := 0 to fContextList.Count - 1 do
-      if fContextList[i].FDevice = ADevice then
-        DestroyContext(fContextList[i]);
-  end;
-end;
-
-class function CUDAContextManager.GetThreadStack: TCUDAContextList;
-begin
-  if vStackIndex = 0 then
-  begin
-    SetLength(FContextStacks, Length(FContextStacks)+1);
-    FContextStacks[High(FContextStacks)] := TCUDAContextList.Create;
-    vStackIndex := High(FContextStacks)+1;
-  end;
-  Result := FContextStacks[vStackIndex-1];
-end;
-
-class function CUDAContextManager.GetCurrentThreadContext: TCUDAContext;
-begin
-  if GetThreadStack.Count > 0 then
-    Result := GetThreadStack.Last
-  else
-    Result := nil;
-end;
-
-class procedure CUDAContextManager.PushContext(aContext: TCUDAContext);
-var
-  LContext: TCUDAContext;
-  cuContext: PCUcontext;
-begin
-  LContext := GetCurrentThreadContext;
-  if LContext <> aContext then
-  begin
-    // Pop current
-    if Assigned(LContext) then
-      if cuCtxPopCurrent(cuContext) = CUDA_SUCCESS then
-      begin
-        if LContext.fHandle <> cuContext then
-        begin
-          GLSLogger.LogError(strUnbalansedUsage);
-          Abort;
-        end;
-      end
-      else
-        Abort;
-    // Push required
-    if cuCtxPushCurrent(aContext.fHandle) <> CUDA_SUCCESS then
-      Abort;
-  end;
-  GetThreadStack.Add(aContext);
-end;
-
-class function CUDAContextManager.PopContext: TCUDAContext;
-var
-  C: Integer;
-  LContext: TCUDAContext;
-  cuContext: PCUcontext;
-begin
-  C := GetThreadStack.Count;
-  if C = 0 then
-  begin
-    GLSLogger.LogError(strUnbalansedUsage);
-    Abort;
-  end;
-
-  Result := GetThreadStack.Last;
-  GetThreadStack.Delete(C - 1);
-
-  LContext := GetCurrentThreadContext;
-  if Result <> LContext then
-  begin
-    if cuCtxPopCurrent(cuContext) = CUDA_SUCCESS then
-    begin
-      if Result.fHandle <> cuContext then
-      begin
-        GLSLogger.LogError(strUnbalansedUsage);
-        Abort;
-      end;
-    end
-    else
-      Abort;
-
-    if Assigned(LContext)
-      and (cuCtxPushCurrent(LContext.fHandle) <> CUDA_SUCCESS) then
-        Abort;
-  end;
-end;
-
-// ------------------
-// ------------------ TCUDAHandlesMaster ------------------
-// ------------------
-
-procedure TCUDAHandlesMaster.AllocateHandles;
-var
-  LList: TCUDAHandleList.TLockableList;
-begin
-  LList := GetContext.FHandleList.LockList;
-  if LList.IndexOf(Self) < 0 then
-    LList.Add(Self);
-  GetContext.FHandleList.UnlockList;
-end;
-
-procedure TCUDAHandlesMaster.DestroyHandles;
-begin
-  GetContext.FHandleList.Remove(Self);
-end;
-
-// ------------------
-// ------------------ TCUDAContext ------------------
-// ------------------
-
-constructor TCUDAContext.Create;
-begin
-  inherited Create;
-  fHandle := nil;
-  FDevice := nil;
-  FHandleList := TCUDAHandleList.Create;
-end;
-
-destructor TCUDAContext.Destroy;
-begin
-  DestroyAllHandles;
-  CUDAContextManager.DestroyContext(Self);
-  FHandleList.Destroy;
-  inherited;
-end;
-
-procedure TCUDAContext.SetDevice(ADevice: TCUDADevice);
-begin
-  if FDevice <> ADevice then
-  begin
-    CUDAContextManager.DestroyContext(Self);
-    FDevice := ADevice;
-  end;
-end;
-
-procedure TCUDAContext.Requires;
-begin
-  if not IsValid then
-  begin
-    GLSLogger.LogError(strContextNotInit);
-    Abort;
-  end;
-  CUDAContextManager.PushContext(Self);
-end;
-
-procedure TCUDAContext.Release;
-begin
-  CUDAContextManager.PopContext;
-end;
-
-procedure TCUDAContext.DestroyAllHandles;
-var
-  i: Integer;
-  LList: TCUDAHandleList.TLockableList;
-begin
-  Requires;
-  LList := FHandleList.LockList;
-  try
-    for i := LList.Count - 1 downto 0 do
-      LList[i].DestroyHandles;
-  finally
-    FHandleList.Clear;
-    FHandleList.UnlockList;
-    Release;
-  end;
-end;
-
-function TCUDAContext.IsValid: Boolean;
-begin
-  Result := Assigned(fHandle);
-end;
-
-// ------------------------------------------------------------------
-initialization
-// ------------------------------------------------------------------
-
-  RegisterClasses([TGLSCUDADevice]);
-  CUDAContextManager.Init;
-
-finalization
-
-  CUDAContextManager.Done;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDAContext;
+
+(* CUDA context *)
+
+interface
+
+uses
+  System.Classes,
+  System.SysUtils,
+  GLS.Strings,
+  GLS.BaseClasses,
+  GLS.Context,
+  GLS.Generics,
+
+  Import.CUDAApi,
+  Import.CUDARunTime;
+
+type
+
+  TCUDADimensions = class(TGLUpdateAbleObject)
+  private
+    FXYZ: TDim3;
+    FMaxXYZ: TDim3;
+    FReadOnly: Boolean;
+    function GetDimComponent(index: Integer): Integer;
+    procedure SetDimComponent(index: Integer; Value: Integer);
+    function GetMaxDimComponent(index: Integer): Integer;
+    procedure SetMaxDimComponent(index: Integer; Value: Integer);
+  public
+    constructor Create(AOwner: TPersistent); override;
+    procedure Assign(Source: TPersistent); override;
+    property MaxSizeX: Integer index 0 read GetMaxDimComponent
+      write SetMaxDimComponent;
+    property MaxSizeY: Integer index 1 read GetMaxDimComponent
+      write SetMaxDimComponent;
+    property MaxSizeZ: Integer index 2 read GetMaxDimComponent
+      write SetMaxDimComponent;
+    property ReadOnlyValue: Boolean read FReadOnly write FReadOnly;
+  published
+    { Published Properties }
+    property SizeX: Integer index 0 read GetDimComponent write SetDimComponent
+      default 1;
+    property SizeY: Integer index 1 read GetDimComponent write SetDimComponent
+      default 1;
+    property SizeZ: Integer index 2 read GetDimComponent write SetDimComponent
+      default 1;
+  end;
+
+  TCUDAContext = class;
+  TOnOpenGLInteropInit = procedure(out Context: TGLContext) of object;
+
+  TCUDADevice = class(TPersistent)
+  private
+    fID: Integer;
+    fHandle: TCUdevice;
+    fGFlops: Integer;
+    fDeviceProperties: TCudaDeviceProp;
+    FSuitable: Boolean;
+    FUsed: Boolean;
+    fMaxThreadsDim: TCUDADimensions;
+    fMaxGridSize: TCUDADimensions;
+  protected
+    function GetName: string;
+  public
+    constructor Create; reintroduce;
+    destructor Destroy; override;
+    procedure Assign(Source: TPersistent); override;
+    {  Returns in bytes the total amount of memory
+      available on the device dev in bytes. }
+    function TotalMemory: Cardinal;
+  published
+    property Name: string read GetName;
+    property TotalGlobalMem: NativeUInt read fDeviceProperties.TotalGlobalMem;
+    property SharedMemPerBlock: NativeUInt read fDeviceProperties.SharedMemPerBlock;
+    property RegsPerBlock: Integer read fDeviceProperties.RegsPerBlock;
+    property WarpSize: Integer read fDeviceProperties.WarpSize;
+    property MemPitch: NativeUInt read fDeviceProperties.MemPitch;
+    property MaxThreadsPerBlock: Integer
+      read fDeviceProperties.MaxThreadsPerBlock;
+    property MaxThreadsDim: TCUDADimensions read fMaxThreadsDim;
+    property MaxGridSize: TCUDADimensions read fMaxGridSize;
+    property ClockRate: Integer read fDeviceProperties.ClockRate;
+    property TotalConstMem: NativeUInt read fDeviceProperties.TotalConstMem;
+    property Major: Integer read fDeviceProperties.Major;
+    property Minor: Integer read fDeviceProperties.Minor;
+    property TextureAlignment: NativeUInt read fDeviceProperties.TextureAlignment;
+    property DeviceOverlap: Integer read fDeviceProperties.DeviceOverlap;
+    property MultiProcessorCount: Integer
+      read fDeviceProperties.MultiProcessorCount;
+  end;
+
+  TGLCUDADevice = class(TComponent)
+  private
+    FSelectDeviceName: string;
+    function GetDevice: TCUDADevice;
+    procedure SetDevice(AValue: TCUDADevice);
+    procedure SetDeviceName(const AName: string);
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    function Suitable: Boolean;
+  published
+    property SelectDevice: string read FSelectDeviceName write SetDeviceName;
+    property Device: TCUDADevice read GetDevice write SetDevice;
+  end;
+
+  TCUDAHandlesMaster = class(TComponent)
+  protected
+    function GetContext: TCUDAContext; virtual; abstract;
+    procedure AllocateHandles; virtual;
+    procedure DestroyHandles; virtual;
+  end;
+
+  TCUDAHandleList = GThreadList<TCUDAHandlesMaster>;
+
+  TCUDAContext = class(TObject)
+  private
+    fHandle: PCUcontext;
+    FDevice: TCUDADevice;
+    FOnOpenGLInteropInit: TOnOpenGLInteropInit;
+    FHandleList: TCUDAHandleList;
+    procedure SetDevice(ADevice: TCUDADevice);
+  public
+    constructor Create;
+    destructor Destroy; override;
+    {  Destroy all handles based of this context. }
+    procedure DestroyAllHandles;
+    {  Pushes context onto CPU thread’s stack of current contexts. }
+    procedure Requires;
+    {  Pops context from current CPU thread. }
+    procedure Release;
+    function IsValid: Boolean; inline;
+    property Device: TCUDADevice read FDevice write SetDevice;
+    property OnOpenGLInteropInit: TOnOpenGLInteropInit read FOnOpenGLInteropInit
+      write FOnOpenGLInteropInit;
+  end;
+
+  TCUDADeviceList = GList<TCUDADevice>;
+  TCUDAContextList = GList<TCUDAContext>;
+
+  {  Static class of CUDA contexts manager. }
+   CUDAContextManager = class
+  private
+    class var fDeviceList: TCUDADeviceList;
+    class var fContextList: TCUDAContextList;
+    class var FContextStacks: array of TCUDAContextList;
+  protected
+    class function GetDevice(i: Integer): TCUDADevice;
+    class function GetNextUnusedDevice: TCUDADevice;
+    class procedure RegisterContext(aContext: TCUDAContext);
+    class procedure UnRegisterContext(aContext: TCUDAContext);
+    class function GetThreadStack: TCUDAContextList;
+    class function GetContext(i: Integer): TCUDAContext;
+  public
+    //  Management
+    class procedure Init;
+    class procedure Done;
+    class procedure CreateContext(aContext: TCUDAContext);
+    class procedure DestroyContext(aContext: TCUDAContext);
+    class procedure CreateContextOf(ADevice: TCUDADevice);
+    class procedure DestroyContextOf(ADevice: TCUDADevice);
+    class procedure PushContext(aContext: TCUDAContext);
+    class function PopContext: TCUDAContext;
+    //  Fill unused device list to show its in property.
+    class procedure FillUnusedDeviceList(var AList: TStringList);
+    //  Return device by name.
+    class function GetDeviceByName(const AName: string): TCUDADevice;
+    //  Returns the number of CUDA compatiable devices.
+    class function DeviceCount: Integer;
+    //  Access to devices list.
+    property Devices[i: Integer]: TCUDADevice read GetDevice;
+    //  Returns a device that has a maximum Giga flops.
+    class function GetMaxGflopsDevice: TCUDADevice;
+    //  Returns the number of TCUDAcontext object.
+    class function ContextCount: Integer;
+    //  Return CUDA context of current thread.
+    class function GetCurrentThreadContext: TCUDAContext;
+    {  Access to contexts list. }
+    property Contexts[i: Integer]: TCUDAContext read GetContext;
+  end;
+
+//---------------------------------------------------------------------
+implementation
+//---------------------------------------------------------------------
+
+threadvar
+  vStackIndex: Cardinal;
+
+// ------------------
+// ------------------ TCUDADimensions ------------------
+// ------------------
+
+constructor TCUDADimensions.Create(AOwner: TPersistent);
+const
+  cXYZone: TDim3 = (1, 1, 1);
+  cXYZmax: TDim3 = (MaxInt, MaxInt, MaxInt);
+begin
+  inherited Create(AOwner);
+  FReadOnly := False;
+  FXYZ := cXYZone;
+  FMaxXYZ := cXYZmax;
+end;
+
+procedure TCUDADimensions.Assign(Source: TPersistent);
+begin
+  if Source is TCUDADimensions then
+  begin
+    FMaxXYZ[0] := TCUDADimensions(Source).FMaxXYZ[0];
+    FMaxXYZ[1] := TCUDADimensions(Source).FMaxXYZ[1];
+    FMaxXYZ[2] := TCUDADimensions(Source).FMaxXYZ[2];
+    FXYZ[0] := TCUDADimensions(Source).FXYZ[0];
+    FXYZ[1] := TCUDADimensions(Source).FXYZ[1];
+    FXYZ[2] := TCUDADimensions(Source).FXYZ[2];
+    NotifyChange(Self);
+  end;
+  inherited Assign(Source);
+end;
+
+function TCUDADimensions.GetDimComponent(index: Integer): Integer;
+begin
+  Result := FXYZ[index];
+end;
+
+procedure TCUDADimensions.SetDimComponent(index: Integer; Value: Integer);
+var
+  v: LongWord;
+begin
+  if not FReadOnly then
+  begin
+    if Value < 1 then
+      v := 1
+    else
+      v := LongWord(Value);
+    if v > FMaxXYZ[index] then
+      v := FMaxXYZ[index];
+    FXYZ[index] := v;
+    NotifyChange(Self);
+  end;
+end;
+
+function TCUDADimensions.GetMaxDimComponent(index: Integer): Integer;
+begin
+  Result := FMaxXYZ[index];
+end;
+
+procedure TCUDADimensions.SetMaxDimComponent(index: Integer; Value: Integer);
+begin
+  if not FReadOnly then
+  begin
+    if Value > 0 then
+    begin
+      FMaxXYZ[index] := LongWord(Value);
+      if FXYZ[index] > FMaxXYZ[index] then
+        FXYZ[index] := FMaxXYZ[index];
+      NotifyChange(Self);
+    end;
+  end;
+end;
+
+// ------------------
+// ------------------ TCUDADevice ------------------
+// ------------------
+
+constructor TCUDADevice.Create;
+begin
+  fMaxThreadsDim := TCUDADimensions.Create(Self);
+  fMaxThreadsDim.ReadOnlyValue := True;
+  fMaxGridSize := TCUDADimensions.Create(Self);
+  fMaxGridSize.ReadOnlyValue := True;
+
+  if IsCUDAInitialized then
+  begin
+    fID := CUDAContextManager.fDeviceList.Count;
+    FUsed := False;
+
+    FSuitable := cuDeviceGet(fHandle, fID) = CUDA_SUCCESS;
+    if FSuitable then
+    begin
+      cuDeviceGetName(@fDeviceProperties.name[0], SizeOf(fDeviceProperties.name), fHandle);
+      cuDeviceTotalMem(@fDeviceProperties.TotalGlobalMem, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.SharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.RegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MemPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.MaxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.ClockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.TotalConstMem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, fHandle);
+      cuDeviceComputeCapability(fDeviceProperties.Major, fDeviceProperties.Minor, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.TextureAlignment, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.DeviceOverlap, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, fHandle);
+      cuDeviceGetAttribute(@fDeviceProperties.DeviceOverlap, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, fHandle);
+      fGFlops := fDeviceProperties.MultiProcessorCount *
+        fDeviceProperties.ClockRate;
+      fMaxThreadsDim.FXYZ[0] := fDeviceProperties.MaxThreadsDim[0];
+      fMaxThreadsDim.FXYZ[1] := fDeviceProperties.MaxThreadsDim[1];
+      fMaxThreadsDim.FXYZ[2] := fDeviceProperties.MaxThreadsDim[2];
+      fMaxGridSize.FXYZ[0] := fDeviceProperties.MaxGridSize[0];
+      fMaxGridSize.FXYZ[1] := fDeviceProperties.MaxGridSize[1];
+      fMaxGridSize.FXYZ[2] := fDeviceProperties.MaxGridSize[2];
+    end;
+  end;
+end;
+
+destructor TCUDADevice.Destroy;
+begin
+  fMaxThreadsDim.Destroy;
+  fMaxGridSize.Destroy;
+  inherited;
+end;
+
+procedure TCUDADevice.Assign(Source: TPersistent);
+var
+  dev: TCUDADevice;
+begin
+  if Source is TCUDADevice then
+  begin
+    dev := TCUDADevice(Source);
+    fID := dev.fID;
+    fHandle := dev.fHandle;
+    fGFlops := dev.fGFlops;
+    fDeviceProperties := dev.fDeviceProperties;
+    FSuitable := dev.FSuitable;
+    fMaxThreadsDim.Assign(dev.fMaxThreadsDim);
+    fMaxGridSize.Assign(dev.fMaxGridSize);
+  end;
+  inherited Assign(Source);
+end;
+
+function TCUDADevice.GetName: string;
+begin
+  Result := Format('%s (%d)', [string(fDeviceProperties.name), fID + 1]);
+end;
+
+function TCUDADevice.TotalMemory: Cardinal;
+begin
+  cuDeviceTotalMem(@fDeviceProperties.TotalGlobalMem, fHandle);
+  Result := fDeviceProperties.TotalGlobalMem;
+end;
+
+// ------------------
+// ------------------ TGLCUDADevice ------------------
+// ------------------
+
+constructor TGLCUDADevice.Create(AOwner: TComponent);
+var
+  LDevice: TCUDADevice;
+begin
+  inherited Create(AOwner);
+  LDevice := CUDAContextManager.GetNextUnusedDevice;
+  if Assigned(LDevice) and LDevice.FSuitable then
+  begin
+    FSelectDeviceName := LDevice.name;
+    LDevice.FUsed := True;
+  end
+  else
+  begin
+    FSelectDeviceName := '';
+  end;
+end;
+
+destructor TGLCUDADevice.Destroy;
+var
+  Device: TCUDADevice;
+begin
+  inherited;
+  Device := CUDAContextManager.GetDeviceByName(FSelectDeviceName);
+  if Assigned(Device) then
+    Device.FUsed := False;
+end;
+
+function TGLCUDADevice.GetDevice: TCUDADevice;
+begin
+  Result := CUDAContextManager.GetDeviceByName(FSelectDeviceName);
+end;
+
+function TGLCUDADevice.Suitable: Boolean;
+var
+  LDevice: TCUDADevice;
+begin
+  LDevice := GetDevice;
+  Result := Assigned(LDevice);
+  if Result then
+    Result := LDevice.FSuitable;
+end;
+
+procedure TGLCUDADevice.SetDevice(AValue: TCUDADevice);
+begin
+end;
+
+procedure TGLCUDADevice.SetDeviceName(const AName: string);
+begin
+  if FSelectDeviceName <> AName then
+  begin
+    CUDAContextManager.DestroyContextOf(Self.Device);
+    FSelectDeviceName := AName;
+    CUDAContextManager.CreateContextOf(Self.Device);
+  end;
+end;
+
+// ------------------
+// ------------------ TCUDAContextManager ------------------
+// ------------------
+
+class procedure CUDAContextManager.Init;
+var
+  dCount: Integer;
+  status: TCUresult;
+  i: Integer;
+begin
+  if InitCUDA and not Assigned(fDeviceList) then
+  begin
+    fDeviceList := TCUDADeviceList.Create;
+    fContextList := TCUDAContextList.Create;
+    dCount := 0;
+    status := cuInit(0);
+    if status = CUDA_SUCCESS then
+      cuDeviceGetCount(dCount);
+
+    // Fill devices list
+    for i := 0 to dCount - 1 do
+      fDeviceList.Add(TCUDADevice.Create);
+  end;
+end;
+
+class procedure CUDAContextManager.Done;
+var
+  I, J: Integer;
+begin
+  if Assigned(fDeviceList) then
+    for i := 0 to fDeviceList.Count - 1 do
+      fDeviceList[i].Free;
+
+  for I := 0 to High(FContextStacks) do
+  begin
+    if FContextStacks[I].Count > 0 then
+    begin
+      //Unbalansed Usage
+      for J := FContextStacks[I].Count - 1 to 0 do
+        FContextStacks[I][J].Release;
+    end;
+    FContextStacks[I].Destroy;
+  end;
+
+  fDeviceList.Free;
+  fContextList.Free;
+  CloseCUDA;
+end;
+
+class procedure CUDAContextManager.RegisterContext(aContext: TCUDAContext);
+begin
+  if fContextList.IndexOf(aContext) >= 0 then
+  begin
+    // Invalid Context Reg
+    Abort;
+  end
+  else
+    fContextList.Add(aContext);
+end;
+
+class procedure CUDAContextManager.UnRegisterContext(aContext: TCUDAContext);
+begin
+  if fContextList.IndexOf(aContext) < 0 then
+  begin
+    // Invalid Context Reg
+    Abort;
+  end
+  else
+  begin
+    fContextList.Remove(aContext);
+  end;
+end;
+
+class function CUDAContextManager.ContextCount: Integer;
+begin
+  Result := fContextList.Count;
+end;
+
+class function CUDAContextManager.DeviceCount: Integer;
+begin
+  Result := fDeviceList.Count;
+end;
+
+class function CUDAContextManager.GetDevice(i: Integer): TCUDADevice;
+begin
+  Result := nil;
+  if i < fDeviceList.Count then
+    Result := fDeviceList[i];
+end;
+
+class function CUDAContextManager.GetContext(i: Integer): TCUDAContext;
+begin
+  Result := nil;
+  if i < fContextList.Count then
+    Result := fContextList[i];
+end;
+
+class procedure CUDAContextManager.FillUnusedDeviceList(var AList: TStringList);
+var
+  i: Integer;
+begin
+  if not Assigned(AList) then
+    AList := TStringList.Create
+  else
+    AList.Clear;
+  for i := 0 to fDeviceList.Count - 1 do
+    if not fDeviceList[i].FUsed then
+      AList.Add(fDeviceList[i].name);
+end;
+
+class function CUDAContextManager.GetDeviceByName(const AName: string)
+  : TCUDADevice;
+var
+  i: Integer;
+  Device: TCUDADevice;
+begin
+  Result := nil;
+  if Length(AName) = 0 then
+    exit;
+
+  for i := 0 to fDeviceList.Count - 1 do
+  begin
+    Device := fDeviceList[i];
+    if Device.name = AName then
+    begin
+      Result := Device;
+      exit;
+    end;
+  end;
+end;
+
+class function CUDAContextManager.GetMaxGflopsDevice: TCUDADevice;
+var
+  max_gflops: Integer;
+  i: Integer;
+  Device: TCUDADevice;
+begin
+  Device := nil;
+  max_gflops := 0;
+  for i := 0 to fDeviceList.Count - 1 do
+  begin
+    if max_gflops < fDeviceList.Items[i].fGFlops then
+    begin
+      Device := fDeviceList.Items[i];
+      max_gflops := Device.fGFlops;
+    end;
+  end;
+  Result := Device;
+end;
+
+class function CUDAContextManager.GetNextUnusedDevice: TCUDADevice;
+var
+  i: Integer;
+  Device: TCUDADevice;
+begin
+  Result := nil;
+  for i := 0 to fDeviceList.Count - 1 do
+  begin
+    Device := fDeviceList[i];
+    if not Device.FUsed then
+    begin
+      Result := Device;
+      exit;
+    end;
+  end;
+end;
+
+class procedure CUDAContextManager.CreateContext(aContext: TCUDAContext);
+var
+  status: TCUresult;
+  cuOldContext, cuContext: PCUcontext;
+  LGLContext: TGLContext;
+  LStack: TCUDAContextList;
+begin
+  if not Assigned(aContext.FDevice)
+    or not aContext.FDevice.FSuitable then
+  begin
+    // No Device To Create
+    Abort;
+  end;
+
+  if GetThreadStack.Count > 0 then
+  begin
+    if cuCtxPopCurrent(cuOldContext) <> CUDA_SUCCESS then
+    begin
+      // Thread Busy
+      Abort;
+    end;
+  end
+  else
+    cuOldContext := nil;
+
+  if aContext.IsValid then
+    DestroyContext(aContext);
+
+  RegisterContext(aContext);
+
+  status := CUDA_SUCCESS;
+  if Assigned(aContext.FOnOpenGLInteropInit) then
+  begin
+    aContext.FOnOpenGLInteropInit(LGLContext);
+    if Assigned(LGLContext) and LGLContext.IsValid then
+    begin
+      LGLContext.Activate;
+      cuContext := nil;
+      status := cuGLCtxCreate(cuContext, 0, aContext.FDevice.fHandle);
+      LGLContext.Deactivate;
+    end
+    else
+    begin
+      // Invalid GL Context
+      UnRegisterContext(aContext);
+      Abort;
+    end;
+  end
+  else
+  begin
+    status := cuCtxCreate(cuContext, 0, aContext.FDevice.fHandle);
+  end;
+
+  if (status <> CUDA_SUCCESS) then
+  begin
+    cudaGetLastErrorString;
+    UnRegisterContext(aContext);
+    cuCtxDetach(cuContext);
+    Abort;
+  end;
+
+  aContext.fHandle := cuContext;
+
+  // Make context be floating to use it in different thread
+  if cuCtxPopCurrent(cuContext) <> CUDA_SUCCESS then
+  begin
+    // Make Floating Failed
+    LStack := GetThreadStack;
+    LStack.Insert(LStack.Count - 1, aContext);
+  end;
+
+  if Assigned(cuOldContext) then
+    cuCtxPushCurrent(cuOldContext);
+end;
+
+class procedure CUDAContextManager.CreateContextOf(ADevice: TCUDADevice);
+var
+  i: Integer;
+begin
+  if Assigned(ADevice) and ADevice.FSuitable then
+  begin
+    for i := 0 to fContextList.Count do
+      if fContextList[i].FDevice = ADevice then
+        CreateContext(fContextList[i]);
+  end;
+end;
+
+class procedure CUDAContextManager.DestroyContext(aContext: TCUDAContext);
+begin
+  if aContext.IsValid then
+  begin
+    aContext.DestroyAllHandles;
+    cuCtxDestroy(aContext.fHandle);
+    aContext.fHandle := nil;
+    CUDAContextManager.UnRegisterContext(aContext);
+  end;
+end;
+
+class procedure CUDAContextManager.DestroyContextOf(ADevice: TCUDADevice);
+var
+  i: Integer;
+begin
+  if Assigned(ADevice) and ADevice.FSuitable then
+  begin
+    for i := 0 to fContextList.Count - 1 do
+      if fContextList[i].FDevice = ADevice then
+        DestroyContext(fContextList[i]);
+  end;
+end;
+
+class function CUDAContextManager.GetThreadStack: TCUDAContextList;
+begin
+  if vStackIndex = 0 then
+  begin
+    SetLength(FContextStacks, Length(FContextStacks)+1);
+    FContextStacks[High(FContextStacks)] := TCUDAContextList.Create;
+    vStackIndex := High(FContextStacks)+1;
+  end;
+  Result := FContextStacks[vStackIndex-1];
+end;
+
+class function CUDAContextManager.GetCurrentThreadContext: TCUDAContext;
+begin
+  if GetThreadStack.Count > 0 then
+    Result := GetThreadStack.Last
+  else
+    Result := nil;
+end;
+
+class procedure CUDAContextManager.PushContext(aContext: TCUDAContext);
+var
+  LContext: TCUDAContext;
+  cuContext: PCUcontext;
+begin
+  LContext := GetCurrentThreadContext;
+  if LContext <> aContext then
+  begin
+    // Pop current
+    if Assigned(LContext) then
+      if cuCtxPopCurrent(cuContext) = CUDA_SUCCESS then
+      begin
+        if LContext.fHandle <> cuContext then
+        begin
+          // Unbalansed Usage
+          Abort;
+        end;
+      end
+      else
+        Abort;
+    // Push required
+    if cuCtxPushCurrent(aContext.fHandle) <> CUDA_SUCCESS then
+      Abort;
+  end;
+  GetThreadStack.Add(aContext);
+end;
+
+class function CUDAContextManager.PopContext: TCUDAContext;
+var
+  C: Integer;
+  LContext: TCUDAContext;
+  cuContext: PCUcontext;
+begin
+  C := GetThreadStack.Count;
+  if C = 0 then
+  begin
+    // UnbalansedUsage
+    Abort;
+  end;
+
+  Result := GetThreadStack.Last;
+  GetThreadStack.Delete(C - 1);
+
+  LContext := GetCurrentThreadContext;
+  if Result <> LContext then
+  begin
+    if cuCtxPopCurrent(cuContext) = CUDA_SUCCESS then
+    begin
+      if Result.fHandle <> cuContext then
+      begin
+        // UnbalansedUsage
+        Abort;
+      end;
+    end
+    else
+      Abort;
+
+    if Assigned(LContext)
+      and (cuCtxPushCurrent(LContext.fHandle) <> CUDA_SUCCESS) then
+        Abort;
+  end;
+end;
+
+// ------------------
+// ------------------ TCUDAHandlesMaster ------------------
+// ------------------
+
+procedure TCUDAHandlesMaster.AllocateHandles;
+var
+  LList: TCUDAHandleList.TLockableList;
+begin
+  LList := GetContext.FHandleList.LockList;
+  if LList.IndexOf(Self) < 0 then
+    LList.Add(Self);
+  GetContext.FHandleList.UnlockList;
+end;
+
+procedure TCUDAHandlesMaster.DestroyHandles;
+begin
+  GetContext.FHandleList.Remove(Self);
+end;
+
+// ------------------
+// ------------------ TCUDAContext ------------------
+// ------------------
+
+constructor TCUDAContext.Create;
+begin
+  inherited Create;
+  fHandle := nil;
+  FDevice := nil;
+  FHandleList := TCUDAHandleList.Create;
+end;
+
+destructor TCUDAContext.Destroy;
+begin
+  DestroyAllHandles;
+  CUDAContextManager.DestroyContext(Self);
+  FHandleList.Destroy;
+  inherited;
+end;
+
+procedure TCUDAContext.SetDevice(ADevice: TCUDADevice);
+begin
+  if FDevice <> ADevice then
+  begin
+    CUDAContextManager.DestroyContext(Self);
+    FDevice := ADevice;
+  end;
+end;
+
+procedure TCUDAContext.Requires;
+begin
+  if not IsValid then
+  begin
+    // Context Not Initialized
+    Abort;
+  end;
+  CUDAContextManager.PushContext(Self);
+end;
+
+procedure TCUDAContext.Release;
+begin
+  CUDAContextManager.PopContext;
+end;
+
+procedure TCUDAContext.DestroyAllHandles;
+var
+  i: Integer;
+  LList: TCUDAHandleList.TLockableList;
+begin
+  Requires;
+  LList := FHandleList.LockList;
+  try
+    for i := LList.Count - 1 downto 0 do
+      LList[i].DestroyHandles;
+  finally
+    FHandleList.Clear;
+    FHandleList.UnlockList;
+    Release;
+  end;
+end;
+
+function TCUDAContext.IsValid: Boolean;
+begin
+  Result := Assigned(fHandle);
+end;
+
+// ------------------------------------------------------------------
+initialization
+// ------------------------------------------------------------------
+
+  RegisterClasses([TGLCUDADevice]);
+  CUDAContextManager.Init;
+
+finalization
+
+  CUDAContextManager.Done;
+
+end.

+ 145 - 142
Source/GLS.CUDADataAccess.pas → Source/GPU.CUDADataAccess.pas

@@ -1,142 +1,145 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDADataAccess;
-
-(* CUDA data access implementation *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  System.SysUtils,
-
-  GLS.Logger,
-  GLS.Strings,
-  GLS.Utils;
-
-type
-
-  GCUDAHostElementAccess<TScalar> = class
-  public
-  const
-    ElementSize = SizeOf(TScalar);
-  type
-    TVector2 = array[0..1] of TScalar;
-    TVector3 = array[0..2] of TScalar;
-    TVector4 = array[0..3] of TScalar;
-  private
-    class procedure CheckElementSize(ACNum: Cardinal); inline;
-    class function GetScalar: TScalar;
-    class function GetVector2: TVector2;
-    class function GetVector3: TVector3;
-    class function GetVector4: TVector4;
-    class procedure SetScalar(const AValue: TScalar);
-    class procedure SetVector2(const AValue: TVector2);
-    class procedure SetVector3(const AValue: TVector3);
-    class procedure SetVector4(const AValue: TVector4);
-  public
-    property Scalar: TScalar read GetScalar write SetScalar;
-    property Vector2: TVector2 read GetVector2 write SetVector2;
-    property Vector3: TVector3 read GetVector3 write SetVector3;
-    property Vector4: TVector4 read GetVector4 write SetVector4;
-  end;
-
-  UByteElement = GCUDAHostElementAccess<Byte>;
-  ByteElement = GCUDAHostElementAccess<ShortInt>;
-  UShortElement= GCUDAHostElementAccess<Word>;
-  ShortElement = GCUDAHostElementAccess<SmallInt>;
-  UIntElement = GCUDAHostElementAccess<LongWord>;
-  IntElement = GCUDAHostElementAccess<LongInt>;
-  HalfElement = GCUDAHostElementAccess<THalfFloat>;
-  FloatElement = GCUDAHostElementAccess<Single>;
-  DoubleElement = GCUDAHostElementAccess<Double>;
-
-procedure SetElementAccessAddress(AValue: PByte; ASize: Cardinal);
-function GetElementAccessAddress: PByte;
-function GetElementAccessSize: Cardinal;
-
-//-----------------------------------------------
-implementation
-//-----------------------------------------------
-
-threadvar
-  vElementAccessAddress: PByte;
-  vElementAccessElementSize: Cardinal;
-
-function GetElementAccessAddress: PByte;
-begin
-  Result := vElementAccessAddress;
-end;
-
-function GetElementAccessSize: Cardinal;
-begin
-  Result := vElementAccessElementSize;
-end;
-
-procedure SetElementAccessAddress(AValue: PByte; ASize: Cardinal);
-begin
-  vElementAccessAddress := AValue;
-  vElementAccessElementSize := ASize;
-end;
-
-class procedure GCUDAHostElementAccess<TScalar>.CheckElementSize(ACNum: Cardinal);
-begin
-  if GetElementAccessSize <> ACNum * SizeOf(TScalar) then
-  begin
-    GLSLogger.LogError(strSizeMismatch);
-    Abort;
-  end;
-end;
-
-class function GCUDAHostElementAccess<TScalar>.GetScalar: TScalar;
-begin
-  CheckElementSize(1);
-  Move(GetElementAccessAddress^, Result, SizeOf(TScalar));
-end;
-
-class function GCUDAHostElementAccess<TScalar>.GetVector2: TVector2;
-begin
-  CheckElementSize(2);
-  Move(GetElementAccessAddress^, Result, 2*SizeOf(TScalar));
-end;
-
-class function GCUDAHostElementAccess<TScalar>.GetVector3: TVector3;
-begin
-  CheckElementSize(3);
-  Move(GetElementAccessAddress^, Result, 3*SizeOf(TScalar));
-end;
-
-class function GCUDAHostElementAccess<TScalar>.GetVector4: TVector4;
-begin
-  CheckElementSize(4);
-  Move(GetElementAccessAddress^, Result, 4*SizeOf(TScalar));
-end;
-
-class procedure GCUDAHostElementAccess<TScalar>.SetScalar(const AValue: TScalar);
-begin
-  CheckElementSize(1);
-  Move(AValue, GetElementAccessAddress^, SizeOf(TScalar));
-end;
-
-class procedure GCUDAHostElementAccess<TScalar>.SetVector2(const AValue: TVector2);
-begin
-  CheckElementSize(2);
-  Move(AValue, GetElementAccessAddress^, 2*SizeOf(TScalar));
-end;
-
-class procedure GCUDAHostElementAccess<TScalar>.SetVector3(const AValue: TVector3);
-begin
-  CheckElementSize(3);
-  Move(AValue, GetElementAccessAddress^, 3*SizeOf(TScalar));
-end;
-
-class procedure GCUDAHostElementAccess<TScalar>.SetVector4(const AValue: TVector4);
-begin
-  CheckElementSize(4);
-  Move(AValue, GetElementAccessAddress^, 4*SizeOf(TScalar));
-end;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDADataAccess;
+
+(* CUDA data access implementation *)
+
+interface
+
+uses
+  System.SysUtils,
+
+  GLS.Logger,
+  GLS.Strings,
+  GLS.Utils;
+
+type
+
+  GCUDAHostElementAccess<TScalar> = class
+  public const
+    ElementSize = SizeOf(TScalar);
+
+  type
+    TVector2 = array [0 .. 1] of TScalar;
+    TVector3 = array [0 .. 2] of TScalar;
+    TVector4 = array [0 .. 3] of TScalar;
+  private
+    class procedure CheckElementSize(ACNum: Cardinal); inline;
+    class function GetScalar: TScalar;
+    class function GetVector2: TVector2;
+    class function GetVector3: TVector3;
+    class function GetVector4: TVector4;
+    class procedure SetScalar(const AValue: TScalar);
+    class procedure SetVector2(const AValue: TVector2);
+    class procedure SetVector3(const AValue: TVector3);
+    class procedure SetVector4(const AValue: TVector4);
+  public
+    property Scalar: TScalar read GetScalar write SetScalar;
+    property Vector2: TVector2 read GetVector2 write SetVector2;
+    property Vector3: TVector3 read GetVector3 write SetVector3;
+    property Vector4: TVector4 read GetVector4 write SetVector4;
+  end;
+
+  UByteElement = GCUDAHostElementAccess<Byte>;
+  ByteElement = GCUDAHostElementAccess<ShortInt>;
+  UShortElement = GCUDAHostElementAccess<Word>;
+  ShortElement = GCUDAHostElementAccess<SmallInt>;
+  UIntElement = GCUDAHostElementAccess<LongWord>;
+  IntElement = GCUDAHostElementAccess<LongInt>;
+  HalfElement = GCUDAHostElementAccess<THalfFloat>;
+  FloatElement = GCUDAHostElementAccess<Single>;
+  DoubleElement = GCUDAHostElementAccess<Double>;
+
+procedure SetElementAccessAddress(AValue: PByte; ASize: Cardinal);
+function GetElementAccessAddress: PByte;
+function GetElementAccessSize: Cardinal;
+
+// -----------------------------------------------
+implementation
+// -----------------------------------------------
+
+threadvar
+  vElementAccessAddress: PByte;
+  vElementAccessElementSize: Cardinal;
+
+function GetElementAccessAddress: PByte;
+begin
+  Result := vElementAccessAddress;
+end;
+
+function GetElementAccessSize: Cardinal;
+begin
+  Result := vElementAccessElementSize;
+end;
+
+procedure SetElementAccessAddress(AValue: PByte; ASize: Cardinal);
+begin
+  vElementAccessAddress := AValue;
+  vElementAccessElementSize := ASize;
+end;
+
+class procedure GCUDAHostElementAccess<TScalar>.CheckElementSize
+  (ACNum: Cardinal);
+begin
+  if GetElementAccessSize <> ACNum * SizeOf(TScalar) then
+  begin
+    GLSLogger.LogError(strSizeMismatch);
+    Abort;
+  end;
+end;
+
+class function GCUDAHostElementAccess<TScalar>.GetScalar: TScalar;
+begin
+  CheckElementSize(1);
+  Move(GetElementAccessAddress^, Result, SizeOf(TScalar));
+end;
+
+class function GCUDAHostElementAccess<TScalar>.GetVector2: TVector2;
+begin
+  CheckElementSize(2);
+  Move(GetElementAccessAddress^, Result, 2 * SizeOf(TScalar));
+end;
+
+class function GCUDAHostElementAccess<TScalar>.GetVector3: TVector3;
+begin
+  CheckElementSize(3);
+  Move(GetElementAccessAddress^, Result, 3 * SizeOf(TScalar));
+end;
+
+class function GCUDAHostElementAccess<TScalar>.GetVector4: TVector4;
+begin
+  CheckElementSize(4);
+  Move(GetElementAccessAddress^, Result, 4 * SizeOf(TScalar));
+end;
+
+class procedure GCUDAHostElementAccess<TScalar>.SetScalar
+  (const AValue: TScalar);
+begin
+  CheckElementSize(1);
+  Move(AValue, GetElementAccessAddress^, SizeOf(TScalar));
+end;
+
+class procedure GCUDAHostElementAccess<TScalar>.SetVector2
+  (const AValue: TVector2);
+begin
+  CheckElementSize(2);
+  Move(AValue, GetElementAccessAddress^, 2 * SizeOf(TScalar));
+end;
+
+class procedure GCUDAHostElementAccess<TScalar>.SetVector3
+  (const AValue: TVector3);
+begin
+  CheckElementSize(3);
+  Move(AValue, GetElementAccessAddress^, 3 * SizeOf(TScalar));
+end;
+
+class procedure GCUDAHostElementAccess<TScalar>.SetVector4
+  (const AValue: TVector4);
+begin
+  CheckElementSize(4);
+  Move(AValue, GetElementAccessAddress^, 4 * SizeOf(TScalar));
+end;
+
+end.

+ 393 - 393
Source/GLS.CUDAFFTPlan.pas → Source/GPU.CUDAFFTPlan.pas

@@ -1,393 +1,393 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAFFTPlan;
-
-(*  Fast Fourier Transform for CUDA *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  System.Classes,
-  System.SysUtils,
-
-  GLS.CUDAContext,
-  GLS.CUDA,
-  GLS.CUDAApi,
-  GLS.CUDAFourierTransform,
-
-  GLS.Strings,
-  GLS.Logger;
-
-type
-
-  TCUDAFFTransform =
-  (
-    fftRealToComplex,
-    fftComplexToReal,
-    fftComplexToComplex,
-    fftDoubleToDoubleComplex,
-    fftDoubleComplexToDouble,
-    fftDoubleComplexToDoubleComplex
-  );
-
-  TCUDAFFTdir = (fftdForward, fftdInverse);
-
-  TCUDAFFTPlan = class(TCUDAComponent)
-  private
-    FHandle: TcufftHandle;
-    FWidth: Integer;
-    FHeight: Integer;
-    FDepth: Integer;
-    FBatch: Integer;
-    FSize: Integer;
-    FPaddedSize: Integer;
-    FTransform: TCUDAFFTransform;
-    FStatus: TcufftResult;
-    procedure SetWidth(Value: Integer);
-    procedure SetHeight(Value: Integer);
-    procedure SetDepth(Value: Integer);
-    procedure SetBatch(Value: Integer);
-    procedure SetTransform(Value: TCUDAFFTransform);
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    class procedure CheckLib;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure Assign(Source: TPersistent); override;
-    procedure Execute(ASrc: TCUDAMemData; ADst: TCUDAMemData;
-      const ADir: TCUDAFFTdir = fftdForward);
-  published
-    property Width: Integer read fWidth write SetWidth default 256;
-    property Height: Integer read FHeight write SetHeight default 0;
-    property Depth: Integer read FDepth write SetDepth default 0;
-    property Batch: Integer read FBatch write SetBatch default 1;
-    property Transform: TCUDAFFTransform read FTransform write SetTransform
-      default fftRealToComplex;
-  end;
-
-//---------------------------------------------------------------------  
-implementation
-//---------------------------------------------------------------------  
-
-constructor TCUDAFFTPlan.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  FHandle := INVALID_CUFFT_HANDLE;
-  fWidth := 256;
-  FHeight := 0;
-  FDepth := 0;
-  FBatch := 1;
-  FTransform := fftRealToComplex;
-end;
-
-destructor TCUDAFFTPlan.Destroy;
-begin
-  DestroyHandles;
-  inherited;
-end;
-
-class procedure TCUDAFFTPlan.CheckLib;
-begin
-  if not IsCUFFTInitialized then
-    if not InitCUFFT then
-    begin
-      GLSLogger.LogError('Can not initialize CUFFT library');
-      Abort;
-    end;
-end;
-
-procedure TCUDAFFTPlan.Assign(Source: TPersistent);
-var
-  plan: TCUDAFFTPlan;
-begin
-  if Source is TCUDAFFTPlan then
-  begin
-    DestroyHandles;
-    plan := TCUDAFFTPlan(Source);
-    Width := plan.fWidth;
-    Height := plan.FHeight;
-    Depth := plan.FDepth;
-    Transform := plan.FTransform;
-  end;
-  inherited Assign(Source);
-end;
-
-procedure TCUDAFFTPlan.AllocateHandles;
-var
-  LType: TcufftType;
-begin
-  DestroyHandles;
-
-  case FTransform of
-    fftRealToComplex:
-      LType := CUFFT_R2C;
-    fftComplexToReal:
-      LType := CUFFT_C2R;
-    fftComplexToComplex:
-      LType := CUFFT_C2C;
-    fftDoubleToDoubleComplex:
-      LType := CUFFT_D2Z;
-    fftDoubleComplexToDouble:
-      LType := CUFFT_Z2D;
-    fftDoubleComplexToDoubleComplex:
-      LType := CUFFT_Z2Z;
-  else
-    begin
-      Assert(False, strErrorEx + strUnknownType);
-      LType := CUFFT_R2C;
-    end;
-  end;
-
-  Context.Requires;
-
-  if (FHeight = 0) and (FDepth = 0) then
-  begin
-    FStatus := cufftPlan1d(FHandle, fWidth, LType, FBatch);
-    FSize := FWidth;
-    FPaddedSize := FWidth div 2 + 1;
-    if FBatch > 0 then
-    begin
-      FSize := FSize * FBatch;
-      FPaddedSize := FPaddedSize * FBatch;
-    end;
-  end
-  else if FDepth = 0 then
-  begin
-    FStatus := cufftPlan2d(FHandle, fWidth, FHeight, LType);
-    FSize := FWidth * FHeight;
-    FPaddedSize := FWidth * (FHeight div 2 + 1);
-  end
-  else
-  begin
-    FStatus := cufftPlan3d(FHandle, fWidth, FHeight, FDepth, LType);
-    FSize := FWidth * FHeight * FDepth;
-    FPaddedSize := FWidth * FHeight * (FDepth div 2 + 1);
-  end;
-
-  Context.Release;
-
-  if FStatus <> CUFFT_SUCCESS then
-  begin
-    FHandle := INVALID_CUFFT_HANDLE;
-    Abort;
-  end;
-
-  Context.Requires;
-  FStatus := cufftSetCompatibilityMode(FHandle, CUFFT_COMPATIBILITY_FFTW_PADDING);
-  Context.Release;
-
-  fChanges := [];
-  inherited;
-end;
-
-procedure TCUDAFFTPlan.DestroyHandles;
-begin
-  inherited;
-  CheckLib;
-
-  if FHandle <> INVALID_CUFFT_HANDLE then
-  begin
-    Context.Requires;
-    FStatus := cufftDestroy(FHandle);
-    Context.Release;
-    if FStatus <> CUFFT_SUCCESS then
-      Abort;
-    FHandle := 0;
-    FPaddedSize := 0;
-  end;
-end;
-
-procedure TCUDAFFTPlan.SetWidth(Value: Integer);
-begin
-  if Value < 1 then
-    Value := 1;
-  if Value <> fWidth then
-  begin
-    fWidth := Value;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAFFTPlan.SetHeight(Value: Integer);
-begin
-  if Value < 0 then
-    Value := 0;
-  if Value <> FHeight then
-  begin
-    FHeight := Value;
-    if FHeight > 0 then
-      FBatch := 1;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAFFTPlan.SetDepth(Value: Integer);
-begin
-  if Value < 0 then
-    Value := 0;
-  if Value <> FDepth then
-  begin
-    FDepth := Value;
-    if FDepth > 0 then
-      FBatch := 1;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAFFTPlan.SetBatch(Value: Integer);
-begin
-  if Value < 1 then
-    Value := 1;
-  if Value <> FBatch then
-  begin
-    FBatch := Value;
-    if FBatch > 1 then
-    begin
-      FHeight := 0;
-      FDepth := 0;
-    end;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAFFTPlan.SetTransform(Value: TCUDAFFTransform);
-begin
-  if Value <> FTransform then
-  begin
-    FTransform := Value;
-    CuNotifyChange(cuchSize);
-  end;
-end;
-
-procedure TCUDAFFTPlan.Execute(ASrc: TCUDAMemData; ADst: TCUDAMemData;
-  const ADir: TCUDAFFTdir);
-const
-  sFFTdir: array [TCUDAFFTdir] of Integer = (CUFFT_FORWARD, CUFFT_INVERSE);
-
-  cSourceTypeSize: array[TCUDAFFTransform] of Byte = (
-  SizeOf(TcufftReal),
-  SizeOf(TcufftComplex),
-  SizeOf(TcufftComplex),
-  SizeOf(TcufftDoubleReal),
-  SizeOf(TcufftDoubleComplex),
-  SizeOf(TcufftDoubleComplex));
-
-  cDestinationTypeSize: array[TCUDAFFTransform] of Byte = (
-  SizeOf(TcufftComplex),
-  SizeOf(TcufftReal),
-  SizeOf(TcufftComplex),
-  SizeOf(TcufftDoubleComplex),
-  SizeOf(TcufftDoubleReal),
-  SizeOf(TcufftDoubleComplex));
-var
-  SrcPtr, DstPtr: Pointer;
-  LSrcSize, LDstSize: Integer;
-
-  procedure ForwardCheck;
-  begin
-    if (LSrcSize * FSize > ASrc.DataSize)
-      or (LDstSize * FPaddedSize > ADst.DataSize) then
-    begin
-      GLSLogger.LogError(strBadPlanSize);
-      Abort;
-    end;
-  end;
-
-  procedure InverseCheck;
-  begin
-    if (LSrcSize * FPaddedSize > ASrc.DataSize)
-      or (LDstSize * FSize > ADst.DataSize) then
-    begin
-      GLSLogger.LogError(strBadPlanSize);
-      Abort;
-    end;
-  end;
-
-begin
-  if (FHandle = INVALID_CUFFT_HANDLE) or (fChanges <> []) then
-    AllocateHandles;
-
-  if CUDAContextManager.GetCurrentThreadContext <> nil then
-  begin
-    GLSLogger.LogError(strRequireFreeThread);
-    Abort;
-  end;
-
-  SrcPtr := ASrc.RawData;
-  DstPtr := ADst.RawData;
-
-  LSrcSize := cSourceTypeSize[FTransform];
-  LDstSize := cDestinationTypeSize[FTransform];
-
-  Context.Requires;
-  try
-    case FTransform of
-      fftRealToComplex:
-        begin
-          ForwardCheck;
-          FStatus := cufftExecR2C(FHandle, SrcPtr, DstPtr);
-        end;
-
-      fftComplexToReal:
-        begin
-          InverseCheck;
-          FStatus := cufftExecC2R(FHandle, SrcPtr, DstPtr);
-        end;
-
-      fftComplexToComplex:
-        begin
-          case ADir of
-            fftdForward: ForwardCheck;
-            fftdInverse: InverseCheck;
-          end;
-          FStatus := cufftExecC2C(FHandle, SrcPtr, DstPtr, sFFTdir[ADir]);
-        end;
-
-      fftDoubleToDoubleComplex:
-      begin
-        ForwardCheck;
-        FStatus := cufftExecD2Z(FHandle, SrcPtr, DstPtr);
-      end;
-
-      fftDoubleComplexToDouble:
-      begin
-        InverseCheck;
-        FStatus := cufftExecZ2D(FHandle, SrcPtr, DstPtr);
-      end;
-
-      fftDoubleComplexToDoubleComplex:
-      begin
-        case ADir of
-          fftdForward: ForwardCheck;
-          fftdInverse: InverseCheck;
-        end;
-        FStatus := cufftExecZ2Z(FHandle, SrcPtr, DstPtr, sFFTdir[ADir]);
-      end
-    else
-      FStatus := CUFFT_INVALID_VALUE;
-    end;
-  finally
-    Context.Release;
-  end;
-
-  if FStatus <> CUFFT_SUCCESS then
-    Abort;
-end;
-
-// ------------------------------------------------------------------
-initialization
-// ------------------------------------------------------------------
-
-  RegisterClasses([TCUDAFFTPlan]);
-
-finalization
-
-  CloseCUFFT;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDAFFTPlan;
+
+(*  Fast Fourier Transform for CUDA *)
+
+interface
+
+{$I GLScene.inc}
+
+uses
+  System.Classes,
+  System.SysUtils,
+
+  Import.CUDAApi,
+  GPU.CUDAContext,
+  GPU.CUDA,
+  GPU.CUDAFourierTransform,
+
+  GLS.Strings,
+  GLS.Logger;
+
+type
+
+  TCUDAFFTransform =
+  (
+    fftRealToComplex,
+    fftComplexToReal,
+    fftComplexToComplex,
+    fftDoubleToDoubleComplex,
+    fftDoubleComplexToDouble,
+    fftDoubleComplexToDoubleComplex
+  );
+
+  TCUDAFFTdir = (fftdForward, fftdInverse);
+
+  TCUDAFFTPlan = class(TCUDAComponent)
+  private
+    FHandle: TcufftHandle;
+    FWidth: Integer;
+    FHeight: Integer;
+    FDepth: Integer;
+    FBatch: Integer;
+    FSize: Integer;
+    FPaddedSize: Integer;
+    FTransform: TCUDAFFTransform;
+    FStatus: TcufftResult;
+    procedure SetWidth(Value: Integer);
+    procedure SetHeight(Value: Integer);
+    procedure SetDepth(Value: Integer);
+    procedure SetBatch(Value: Integer);
+    procedure SetTransform(Value: TCUDAFFTransform);
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    class procedure CheckLib;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure Assign(Source: TPersistent); override;
+    procedure Execute(ASrc: TCUDAMemData; ADst: TCUDAMemData;
+      const ADir: TCUDAFFTdir = fftdForward);
+  published
+    property Width: Integer read fWidth write SetWidth default 256;
+    property Height: Integer read FHeight write SetHeight default 0;
+    property Depth: Integer read FDepth write SetDepth default 0;
+    property Batch: Integer read FBatch write SetBatch default 1;
+    property Transform: TCUDAFFTransform read FTransform write SetTransform
+      default fftRealToComplex;
+  end;
+
+//---------------------------------------------------------------------  
+implementation
+//---------------------------------------------------------------------  
+
+constructor TCUDAFFTPlan.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  FHandle := INVALID_CUFFT_HANDLE;
+  fWidth := 256;
+  FHeight := 0;
+  FDepth := 0;
+  FBatch := 1;
+  FTransform := fftRealToComplex;
+end;
+
+destructor TCUDAFFTPlan.Destroy;
+begin
+  DestroyHandles;
+  inherited;
+end;
+
+class procedure TCUDAFFTPlan.CheckLib;
+begin
+  if not IsCUFFTInitialized then
+    if not InitCUFFT then
+    begin
+      GLSLogger.LogError('Can not initialize CUFFT library');
+      Abort;
+    end;
+end;
+
+procedure TCUDAFFTPlan.Assign(Source: TPersistent);
+var
+  plan: TCUDAFFTPlan;
+begin
+  if Source is TCUDAFFTPlan then
+  begin
+    DestroyHandles;
+    plan := TCUDAFFTPlan(Source);
+    Width := plan.fWidth;
+    Height := plan.FHeight;
+    Depth := plan.FDepth;
+    Transform := plan.FTransform;
+  end;
+  inherited Assign(Source);
+end;
+
+procedure TCUDAFFTPlan.AllocateHandles;
+var
+  LType: TcufftType;
+begin
+  DestroyHandles;
+
+  case FTransform of
+    fftRealToComplex:
+      LType := CUFFT_R2C;
+    fftComplexToReal:
+      LType := CUFFT_C2R;
+    fftComplexToComplex:
+      LType := CUFFT_C2C;
+    fftDoubleToDoubleComplex:
+      LType := CUFFT_D2Z;
+    fftDoubleComplexToDouble:
+      LType := CUFFT_Z2D;
+    fftDoubleComplexToDoubleComplex:
+      LType := CUFFT_Z2Z;
+  else
+    begin
+      Assert(False, 'Error: Unknown Type');
+      LType := CUFFT_R2C;
+    end;
+  end;
+
+  Context.Requires;
+
+  if (FHeight = 0) and (FDepth = 0) then
+  begin
+    FStatus := cufftPlan1d(FHandle, fWidth, LType, FBatch);
+    FSize := FWidth;
+    FPaddedSize := FWidth div 2 + 1;
+    if FBatch > 0 then
+    begin
+      FSize := FSize * FBatch;
+      FPaddedSize := FPaddedSize * FBatch;
+    end;
+  end
+  else if FDepth = 0 then
+  begin
+    FStatus := cufftPlan2d(FHandle, fWidth, FHeight, LType);
+    FSize := FWidth * FHeight;
+    FPaddedSize := FWidth * (FHeight div 2 + 1);
+  end
+  else
+  begin
+    FStatus := cufftPlan3d(FHandle, fWidth, FHeight, FDepth, LType);
+    FSize := FWidth * FHeight * FDepth;
+    FPaddedSize := FWidth * FHeight * (FDepth div 2 + 1);
+  end;
+
+  Context.Release;
+
+  if FStatus <> CUFFT_SUCCESS then
+  begin
+    FHandle := INVALID_CUFFT_HANDLE;
+    Abort;
+  end;
+
+  Context.Requires;
+  FStatus := cufftSetCompatibilityMode(FHandle, CUFFT_COMPATIBILITY_FFTW_PADDING);
+  Context.Release;
+
+  fChanges := [];
+  inherited;
+end;
+
+procedure TCUDAFFTPlan.DestroyHandles;
+begin
+  inherited;
+  CheckLib;
+
+  if FHandle <> INVALID_CUFFT_HANDLE then
+  begin
+    Context.Requires;
+    FStatus := cufftDestroy(FHandle);
+    Context.Release;
+    if FStatus <> CUFFT_SUCCESS then
+      Abort;
+    FHandle := 0;
+    FPaddedSize := 0;
+  end;
+end;
+
+procedure TCUDAFFTPlan.SetWidth(Value: Integer);
+begin
+  if Value < 1 then
+    Value := 1;
+  if Value <> fWidth then
+  begin
+    fWidth := Value;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAFFTPlan.SetHeight(Value: Integer);
+begin
+  if Value < 0 then
+    Value := 0;
+  if Value <> FHeight then
+  begin
+    FHeight := Value;
+    if FHeight > 0 then
+      FBatch := 1;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAFFTPlan.SetDepth(Value: Integer);
+begin
+  if Value < 0 then
+    Value := 0;
+  if Value <> FDepth then
+  begin
+    FDepth := Value;
+    if FDepth > 0 then
+      FBatch := 1;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAFFTPlan.SetBatch(Value: Integer);
+begin
+  if Value < 1 then
+    Value := 1;
+  if Value <> FBatch then
+  begin
+    FBatch := Value;
+    if FBatch > 1 then
+    begin
+      FHeight := 0;
+      FDepth := 0;
+    end;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAFFTPlan.SetTransform(Value: TCUDAFFTransform);
+begin
+  if Value <> FTransform then
+  begin
+    FTransform := Value;
+    CuNotifyChange(cuchSize);
+  end;
+end;
+
+procedure TCUDAFFTPlan.Execute(ASrc: TCUDAMemData; ADst: TCUDAMemData;
+  const ADir: TCUDAFFTdir);
+const
+  sFFTdir: array [TCUDAFFTdir] of Integer = (CUFFT_FORWARD, CUFFT_INVERSE);
+
+  cSourceTypeSize: array[TCUDAFFTransform] of Byte = (
+  SizeOf(TcufftReal),
+  SizeOf(TcufftComplex),
+  SizeOf(TcufftComplex),
+  SizeOf(TcufftDoubleReal),
+  SizeOf(TcufftDoubleComplex),
+  SizeOf(TcufftDoubleComplex));
+
+  cDestinationTypeSize: array[TCUDAFFTransform] of Byte = (
+  SizeOf(TcufftComplex),
+  SizeOf(TcufftReal),
+  SizeOf(TcufftComplex),
+  SizeOf(TcufftDoubleComplex),
+  SizeOf(TcufftDoubleReal),
+  SizeOf(TcufftDoubleComplex));
+var
+  SrcPtr, DstPtr: Pointer;
+  LSrcSize, LDstSize: Integer;
+
+  procedure ForwardCheck;
+  begin
+    if (LSrcSize * FSize > ASrc.DataSize)
+      or (LDstSize * FPaddedSize > ADst.DataSize) then
+    begin
+      // Bad Plan Size);
+      Abort;
+    end;
+  end;
+
+  procedure InverseCheck;
+  begin
+    if (LSrcSize * FPaddedSize > ASrc.DataSize)
+      or (LDstSize * FSize > ADst.DataSize) then
+    begin
+      // Bad Plan Size);
+      Abort;
+    end;
+  end;
+
+begin
+  if (FHandle = INVALID_CUFFT_HANDLE) or (fChanges <> []) then
+    AllocateHandles;
+
+  if CUDAContextManager.GetCurrentThreadContext <> nil then
+  begin
+    GLSLogger.LogError(strRequireFreeThread);
+    Abort;
+  end;
+
+  SrcPtr := ASrc.RawData;
+  DstPtr := ADst.RawData;
+
+  LSrcSize := cSourceTypeSize[FTransform];
+  LDstSize := cDestinationTypeSize[FTransform];
+
+  Context.Requires;
+  try
+    case FTransform of
+      fftRealToComplex:
+        begin
+          ForwardCheck;
+          FStatus := cufftExecR2C(FHandle, SrcPtr, DstPtr);
+        end;
+
+      fftComplexToReal:
+        begin
+          InverseCheck;
+          FStatus := cufftExecC2R(FHandle, SrcPtr, DstPtr);
+        end;
+
+      fftComplexToComplex:
+        begin
+          case ADir of
+            fftdForward: ForwardCheck;
+            fftdInverse: InverseCheck;
+          end;
+          FStatus := cufftExecC2C(FHandle, SrcPtr, DstPtr, sFFTdir[ADir]);
+        end;
+
+      fftDoubleToDoubleComplex:
+      begin
+        ForwardCheck;
+        FStatus := cufftExecD2Z(FHandle, SrcPtr, DstPtr);
+      end;
+
+      fftDoubleComplexToDouble:
+      begin
+        InverseCheck;
+        FStatus := cufftExecZ2D(FHandle, SrcPtr, DstPtr);
+      end;
+
+      fftDoubleComplexToDoubleComplex:
+      begin
+        case ADir of
+          fftdForward: ForwardCheck;
+          fftdInverse: InverseCheck;
+        end;
+        FStatus := cufftExecZ2Z(FHandle, SrcPtr, DstPtr, sFFTdir[ADir]);
+      end
+    else
+      FStatus := CUFFT_INVALID_VALUE;
+    end;
+  finally
+    Context.Release;
+  end;
+
+  if FStatus <> CUFFT_SUCCESS then
+    Abort;
+end;
+
+// ------------------------------------------------------------------
+initialization
+// ------------------------------------------------------------------
+
+  RegisterClasses([TCUDAFFTPlan]);
+
+finalization
+
+  CloseCUFFT;
+
+end.

+ 489 - 519
Source/GLS.CUDAFourierTransform.pas → Source/GPU.CUDAFourierTransform.pas

@@ -1,519 +1,489 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAFourierTransform;
-
-(* CUDA Fourier Transform *)
-
-/// *
-// * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
-// *
-// * NOTICE TO USER:
-// *
-// * This source code is subject to NVIDIA ownership rights under U.S. and
-// * international Copyright laws.  Users and possessors of this source code
-// * are hereby granted a nonexclusive, royalty-free license to use this code
-// * in individual and commercial software.
-// *
-// * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
-// * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
-// * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
-// * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
-// * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
-// * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
-// * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-// * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-// * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
-// * OR PERFORMANCE OF THIS SOURCE CODE.
-// *
-// * U.S. Government End Users.   This source code is a "commercial item" as
-// * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
-// * "commercial computer  software"  and "commercial computer software
-// * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
-// * and is provided to the U.S. Government only as a commercial end item.
-// * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
-// * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
-// * source code with only those rights set forth herein.
-// *
-// * Any use of this source code in individual and commercial software must
-// * include, in the user documentation and internal comments to the code,
-// * the above Disclaimer and U.S. Government End Users Notice.
-// */
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  Winapi.Windows,
-
-  GLS.VectorTypes,
-  GLS.Strings,
-
-  GLS.CUDAApi,
-  GLS.CUDARunTime
-
-  {$IFDEF USE_LOGGING},GLSLog;{$ELSE};{$ENDIF}
-
-
-const
-{$IFDEF WIN32}
-  CUFFTDLLNAMES: array [0 .. 9] of string = (
-    'cufft32_42_9', 'cufft32_41_28',
-    'cufft32_40_10', 'cufft32_32_16', 'cufft32_31_4', 'cufft32_30_14',
-    'cufft32_30_9', 'cufft32_30_8', 'cufft32', 'cufft');
-{$ENDIF}
-
-{$IFDEF WIN64}
-  CUFFTDLLNAMES: array [0 .. 7] of string = (
-    'cufft64_42_9', 'cufft64_41_28',
-    'cufft64_40_10', 'cufft64_32_16', 'cufft64_31_4', 'cufft64_30_14',
-    'cufft64_30_9', 'cufft64_30_8');
-{$ENDIF}
-  /// CUFFT API function return values
-
-type
-  /// CUFFT defines and supports the following data types
-
-  /// cufftHandle is a handle type used to store and access CUFFT plans.
-  TcufftHandle = type Cardinal;
-
-  TcufftReal = Single;
-  PcufftReal = ^TcufftReal;
-
-  TcufftRealfloat = Single;
-  PcufftDoubleReal = ^TcufftDoubleReal;
-  TcufftDoubleReal = Double;
-
-  PcufftDoubleComplex = ^TcufftDoubleComplex;
-  TcufftDoubleComplex = TVector2d;
-
-  PcufftComplex = ^TcufftComplex;
-  TcufftComplex = TVector2f;
-
-  TcufftResult = type Byte;
-
-const
-  INVALID_CUFFT_HANDLE = $FFFFFFFF;
-
-  CUFFT_SUCCESS: TcufftResult = $00;
-  CUFFT_INVALID_PLAN: TcufftResult = $01;
-  CUFFT_ALLOC_FAILED: TcufftResult = $02;
-  CUFFT_INVALID_TYPE: TcufftResult = $03;
-  CUFFT_INVALID_VALUE: TcufftResult = $04;
-  CUFFT_INTERNAL_ERROR: TcufftResult = $05;
-  CUFFT_EXEC_FAILED: TcufftResult = $06;
-  CUFFT_SETUP_FAILED: TcufftResult = $07;
-  CUFFT_INVALID_SIZE: TcufftResult = $08;
-
-type
-  TcufftType = type Cardinal;
-
-  TcudaRoundMode = (cudaRoundNearest, cudaRoundZero, cudaRoundPosInf,
-    cudaRoundMinInf);
-
-  /// CUFFT transform directions
-const
-  CUFFT_FORWARD = -1; // Forward FFT
-  CUFFT_INVERSE = 1;  // Inverse FFT
-
-  /// CUFFT supports the following transform types
-  CUFFT_R2C: TcufftType = $2A; // Real to Complex (interleaved)
-  CUFFT_C2R: TcufftType = $2C; // Complex (interleaved) to Real
-  CUFFT_C2C: TcufftType = $29; // Complex to Complex, interleaved
-  CUFFT_D2Z: TcufftType = $6A; // Double to Double-Complex
-  CUFFT_Z2D: TcufftType = $6C; // Double-Complex to Double
-  CUFFT_Z2Z: TcufftType = $69; // Double-Complex to Double-Complex
-
-  (*
-    Certain R2C and C2R transforms go much more slowly when FFTW memory
-    layout and behaviour is required. The default is "best performance",
-    which means not-compatible-with-fftw. Use the cufftSetCompatibilityMode
-    API to enable exact FFTW-like behaviour.
-
-    These flags can be ORed together to select precise FFTW compatibility
-    behaviour. The two levels presently supported are:
-
-    CUFFT_COMPATIBILITY_FFTW_PADDING
-    Inserts extra padding between packed in-place transforms for
-    batched transforms with power-of-2 size.
-
-    CUFFT_COMPATIBILITY_FFTW_C2R_ASYMMETRIC
-    Guarantees FFTW-compatible output for non-symmetric complex inputs
-    for transforms with power-of-2 size. This is only useful for
-    artificial (i.e. random) datasets as actual data will always be
-    symmetric if it has come from the real plane. If you don't
-    understand what this means, you probably don't have to use it.
-
-    CUFFT_COMPATIBILITY_FFTW
-    For convenience, enables all FFTW compatibility modes at once.
-  *)
-
-type
-
-  TcufftCompatibility = type Cardinal;
-
-const
-  CUFFT_COMPATIBILITY_NORMAL: TcufftCompatibility = $00; // The default value
-  CUFFT_COMPATIBILITY_FFTW_PADDING: TcufftCompatibility = $01;
-  CUFFT_COMPATIBILITY_FFTW_C2R_ASYMMETRIC: TcufftCompatibility = $02;
-  CUFFT_COMPATIBILITY_FFTW: TcufftCompatibility = $03;
-
-type
-
-  TcufftPlan1d = function(out plan: TcufftHandle; nx: Integer;
-    atype: TcufftType; batch: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftPlan2d = function(out plan: TcufftHandle; nx: Integer; ny: Integer;
-    atype: TcufftType): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftPlan3d = function(out plan: TcufftHandle; nx: Integer; ny: Integer;
-    nz: Integer; atype: TcufftType): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftDestroy = function(plan: TcufftHandle): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftPlanMany = function(out plan: TcufftHandle; rank: Integer;
-    var n: Integer; var inembed: Integer; istride, idist: Integer;
-    var onembed: Integer; ostride, odist: Integer; ctype: TcufftType;
-    batch: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftExecC2C = function(plan: TcufftHandle; idata: PcufftComplex;
-    odata: PcufftComplex; direction: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftExecR2C = function(plan: TcufftHandle; idata: PcufftReal;
-    odata: PcufftComplex): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftExecC2R = function(plan: TcufftHandle; idata: PcufftComplex;
-    odata: PcufftReal): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftExecZ2Z = function(plan: TcufftHandle; idata: PcufftDoubleComplex;
-    odata: PcufftDoubleComplex; direction: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftExecD2Z = function(plan: TcufftHandle; idata: PcufftDoubleReal;
-    odata: PcufftDoubleComplex): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftExecZ2D = function(plan: TcufftHandle; idata: PcufftDoubleComplex;
-    odata: PcufftDoubleReal): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftSetStream = function(p: TcufftHandle; stream: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcufftSetCompatibilityMode = function(plan: TcufftHandle;
-    mode: TcufftCompatibility): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-
-var
-  cufftPlan1d: TcufftPlan1d;
-  cufftPlan2d: TcufftPlan2d;
-  cufftPlan3d: TcufftPlan3d;
-  cufftDestroy: TcufftDestroy;
-  cufftPlanMany: TcufftPlanMany;
-  cufftExecC2C: TcufftExecC2C;
-  cufftExecR2C: TcufftExecR2C;
-  cufftExecC2R: TcufftExecC2R;
-  cufftExecZ2Z: TcufftExecZ2Z;
-  cufftExecD2Z: TcufftExecD2Z;
-  cufftExecZ2D: TcufftExecZ2D;
-  cufftSetStream: TcufftSetStream;
-  cufftSetCompatibilityMode: TcufftSetCompatibilityMode;
-
-function InitCUFFT: Boolean;
-procedure CloseCUFFT;
-function InitCUFFTFromLibrary(const LibName: WideString): Boolean;
-function IsCUFFTInitialized: Boolean;
-function Get_CUDA_FFT_Error_String(AError: TcufftResult): string;
-
-//---------------------------------------------------------
-implementation
-//---------------------------------------------------------
-
-const
-  cufftPlan1dName = 'cufftPlan1d';
-  cufftPlan2dName = 'cufftPlan2d';
-  cufftPlan3dName = 'cufftPlan3d';
-  cufftDestroyName = 'cufftDestroy';
-  cufftPlanManyName = 'cufftPlanMany';
-  cufftExecC2CName = 'cufftExecC2C';
-  cufftExecR2CName = 'cufftExecR2C';
-  cufftExecC2RName = 'cufftExecC2R';
-  cufftExecZ2ZName = 'cufftExecZ2Z';
-  cufftExecD2ZName = 'cufftExecD2Z';
-  cufftExecZ2DName = 'cufftExecZ2D';
-  cufftSetStreamName = 'cufftSetStream';
-  cufftSetCompatibilityModeName = 'cufftSetCompatibilityMode';
-
-const
-  INVALID_MODULEHANDLE = 0;
-
-var
-{$IFDEF MSWINDOWS}
-  CUFFTHandle: HINST = INVALID_MODULEHANDLE;
-{$ENDIF}{$IFDEF LINUX}
-  CUFFTHandle: TLibHandle = INVALID_MODULEHANDLE;
-{$ENDIF}
-
-{$IFDEF USE_CUDA_DEBUG_MODE}
-var
-  cufftPlan1d_: TcufftPlan1d;
-  cufftPlan2d_: TcufftPlan2d;
-  cufftPlan3d_: TcufftPlan3d;
-  cufftDestroy_: TcufftDestroy;
-  cufftPlanMany_: TcufftPlanMany;
-  cufftExecC2C_: TcufftExecC2C;
-  cufftExecR2C_: TcufftExecR2C;
-  cufftExecC2R_: TcufftExecC2R;
-  cufftExecZ2Z_: TcufftExecZ2Z;
-  cufftExecD2Z_: TcufftExecD2Z;
-  cufftExecZ2D_: TcufftExecZ2D;
-  cufftSetStream_: TcufftSetStream;
-  cufftSetCompatibilityMode_: TcufftSetCompatibilityMode;
-
-function cufftPlan1dShell(out plan: TcufftHandle; nx: Integer;
-  atype: TcufftType; batch: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftPlan1d_(plan, nx, atype, batch);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlan1dName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftPlan2dShell(out plan: TcufftHandle; nx: Integer; ny: Integer;
-  atype: TcufftType): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftPlan2d_(plan, nx, ny, atype);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlan2dName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftPlan3dShell(out plan: TcufftHandle; nx: Integer; ny: Integer;
-  nz: Integer; atype: TcufftType): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftPlan3d_(plan, nx, ny, nz, atype);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlan3dName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftDestroyShell(plan: TcufftHandle): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftDestroy_(plan);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftDestroyName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftPlanManyShell(out plan: TcufftHandle; rank: Integer;
-  var n: Integer; var inembed: Integer; istride, idist: Integer;
-  var onembed: Integer; ostride, odist: Integer; ctype: TcufftType;
-  batch: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftPlanMany_(plan, rank, n, inembed, istride, idist, onembed,
-    ostride, odist, ctype, batch);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlanManyName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftExecC2CShell(plan: TcufftHandle; idata: PcufftComplex;
-  odata: PcufftComplex; direction: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftExecC2C_(plan, idata, odata, direction);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecC2CName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftExecR2CShell(plan: TcufftHandle; idata: PcufftReal;
-  odata: PcufftComplex): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftExecR2C_(plan, idata, odata);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecR2CName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftExecC2RShell(plan: TcufftHandle; idata: PcufftComplex;
-  odata: PcufftReal): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftExecC2R_(plan, idata, odata);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecC2RName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftExecZ2ZShell(plan: TcufftHandle; idata: PcufftDoubleComplex;
-  odata: PcufftDoubleComplex; direction: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftExecZ2Z_(plan, idata, odata, direction);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecZ2ZName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftExecD2ZShell(plan: TcufftHandle; idata: PcufftDoubleReal;
-  odata: PcufftDoubleComplex): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftExecD2Z_(plan, idata, odata);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecD2ZName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftExecZ2DShell(plan: TcufftHandle; idata: PcufftDoubleComplex;
-  odata: PcufftDoubleReal): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftExecZ2D_(plan, idata, odata);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecZ2DName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftSetStreamShell(p: TcufftHandle; stream: Integer): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftSetStream_(p, stream);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftSetStreamName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-
-function cufftSetCompatibilityModeShell(plan: TcufftHandle;
-  mode: TcufftCompatibility): TcufftResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cufftSetCompatibilityMode_(plan, mode);
-  if Result <> CUFFT_SUCCESS then
-    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftSetCompatibilityModeName,
-      Get_CUDA_FFT_Error_String(Result)]);
-end;
-{$ENDIF GLS_CUDA_DEBUG_MODE}
-
-function CUFFTGetProcAddress(ProcName: PAnsiChar): Pointer;
-begin
-  result := GetProcAddress(CUFFTHandle, ProcName);
-end;
-
-function InitCUFFT: Boolean;
-var
-  I: Integer;
-begin
-  Result := True;
-  if CUFFTHandle = INVALID_MODULEHANDLE then
-  begin
-    for I := 0 to High(CUFFTDLLNAMES) do
-    begin
-      if InitCUFFTFromLibrary(CUFFTDLLNAMES[I] + '.dll') then
-        Exit;
-    end;
-    Result := False;
-  end;
-end;
-
-procedure CloseCUFFT;
-begin
-  if CUFFTHandle <> INVALID_MODULEHANDLE then
-  begin
-    FreeLibrary(Cardinal(CUFFTHandle));
-    CUFFTHandle := INVALID_MODULEHANDLE;
-  end;
-end;
-
-function InitCUFFTFromLibrary(const LibName: WideString): Boolean;
-begin
-  CloseCUFFT;
-  CUFFTHandle := GetModuleHandleW(PWideChar(LibName));
-  if CUFFTHandle = INVALID_MODULEHANDLE then
-    CUFFTHandle := LoadLibraryW(PWideChar(LibName));
-  if CUFFTHandle = INVALID_MODULEHANDLE then
-    Exit(False);
-{$IFNDEF USE_CUDA_DEBUG_MODE}
-  cufftPlan1d := CUFFTGetProcAddress(cufftPlan1dName);
-  cufftPlan2d := CUFFTGetProcAddress(cufftPlan2dName);
-  cufftPlan3d := CUFFTGetProcAddress(cufftPlan3dName);
-  cufftDestroy := CUFFTGetProcAddress(cufftDestroyName);
-  cufftPlanMany := CUFFTGetProcAddress(cufftPlanManyName);
-  cufftExecC2C := CUFFTGetProcAddress(cufftExecC2CName);
-  cufftExecR2C := CUFFTGetProcAddress(cufftExecR2CName);
-  cufftExecC2R := CUFFTGetProcAddress(cufftExecC2RName);
-  cufftExecZ2Z := CUFFTGetProcAddress(cufftExecZ2ZName);
-  cufftExecD2Z := CUFFTGetProcAddress(cufftExecD2ZName);
-  cufftExecZ2D := CUFFTGetProcAddress(cufftExecZ2DName);
-  cufftSetStream := CUFFTGetProcAddress(cufftSetStreamName);
-  cufftSetCompatibilityMode := CUFFTGetProcAddress(cufftSetCompatibilityModeName);
-{$ELSE}
-  cufftPlan1d_ := CUFFTGetProcAddress(cufftPlan1dName);
-  cufftPlan1d := cufftPlan1dShell;
-  cufftPlan2d_ := CUFFTGetProcAddress(cufftPlan2dName);
-  cufftPlan2d := cufftPlan2dShell;
-  cufftPlan3d_ := CUFFTGetProcAddress(cufftPlan3dName);
-  cufftPlan3d := cufftPlan3dShell;
-  cufftDestroy_ := CUFFTGetProcAddress(cufftDestroyName);
-  cufftDestroy := cufftDestroyShell;
-  cufftPlanMany_ := CUFFTGetProcAddress(cufftPlanManyName);
-  cufftPlanMany := cufftPlanManyShell;
-  cufftExecC2C_ := CUFFTGetProcAddress(cufftExecC2CName);
-  cufftExecC2C := cufftExecC2CShell;
-  cufftExecR2C_ := CUFFTGetProcAddress(cufftExecR2CName);
-  cufftExecR2C := cufftExecR2CShell;
-  cufftExecC2R_ := CUFFTGetProcAddress(cufftExecC2RName);
-  cufftExecC2R := cufftExecC2RShell;
-  cufftExecZ2Z_ := CUFFTGetProcAddress(cufftExecZ2ZName);
-  cufftExecZ2Z := cufftExecZ2ZShell;
-  cufftExecD2Z_ := CUFFTGetProcAddress(cufftExecD2ZName);
-  cufftExecD2Z := cufftExecD2ZShell;
-  cufftExecZ2D_ := CUFFTGetProcAddress(cufftExecZ2DName);
-  cufftExecZ2D := cufftExecZ2DShell;
-  cufftSetStream_ := CUFFTGetProcAddress(cufftSetStreamName);
-  cufftSetStream := cufftSetStreamShell;
-  cufftSetCompatibilityMode_ := CUFFTGetProcAddress(cufftSetCompatibilityModeName);
-  cufftSetCompatibilityMode := cufftSetCompatibilityModeShell;
-{$ENDIF}
-
- {$IFDEF USE_LOGGING}
-  LogInfoFmt('%s loaded...', [LibName]);
- {$ENDIF}
-  Result := True;
-end;
-
-function IsCUFFTInitialized: Boolean;
-begin
-  result := (CUFFTHandle <> INVALID_MODULEHANDLE);
-end;
-
-function Get_CUDA_FFT_Error_String(AError: TcufftResult): string;
-begin
-  if AError = CUFFT_SUCCESS then
-    result := 'CUFFT operation is successful.'
-  else if AError = CUFFT_INVALID_PLAN then
-    result := 'CUFFT is passed an invalid plan handle.'
-  else if AError = CUFFT_ALLOC_FAILED then
-    result := 'CUFFT failed to allocate GPU memory.'
-  else if AError = CUFFT_INVALID_TYPE then
-    result := 'The user requests an unsupported type.'
-  else if AError = CUFFT_INVALID_VALUE then
-    result := 'The user specifies a bad memory pointer.'
-  else if AError = CUFFT_INTERNAL_ERROR then
-    result := 'Used for all internal driver errors.'
-  else if AError = CUFFT_EXEC_FAILED then
-    result := 'CUFFT failed to execute an FFT on the GPU.'
-  else if AError = CUFFT_SETUP_FAILED then
-    result := 'The CUFFT library failed to initialize.'
-  else if AError = CUFFT_INVALID_SIZE then
-    result := 'The user specifies an unsupported FFT size.'
-  else
-    result := 'Unknown error.'
-end;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDAFourierTransform;
+
+(* CUDA Fourier Transform *)
+
+/// *
+// * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
+// *
+// * NOTICE TO USER:
+// *
+// * This source code is subject to NVIDIA ownership rights under U.S. and
+// * international Copyright laws.  Users and possessors of this source code
+// * are hereby granted a nonexclusive, royalty-free license to use this code
+// * in individual and commercial software.
+// *
+// * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+// * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+// * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+// * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+// * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+// * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+// * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+// * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+// * OR PERFORMANCE OF THIS SOURCE CODE.
+// *
+// * U.S. Government End Users.   This source code is a "commercial item" as
+// * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+// * "commercial computer  software"  and "commercial computer software
+// * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+// * and is provided to the U.S. Government only as a commercial end item.
+// * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+// * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+// * source code with only those rights set forth herein.
+// *
+// * Any use of this source code in individual and commercial software must
+// * include, in the user documentation and internal comments to the code,
+// * the above Disclaimer and U.S. Government End Users Notice.
+// */
+
+interface
+
+uses
+  Winapi.Windows,
+
+  GLS.VectorTypes,
+  GLS.Strings,
+
+  Import.CUDAApi,
+  Import.CUDARunTime;
+
+
+const
+{$IFDEF WIN32}
+  CUFFTDLLNAMES: array [0 .. 9] of string = (
+    'cufft32_42_9', 'cufft32_41_28',
+    'cufft32_40_10', 'cufft32_32_16', 'cufft32_31_4', 'cufft32_30_14',
+    'cufft32_30_9', 'cufft32_30_8', 'cufft32', 'cufft');
+{$ENDIF}
+
+{$IFDEF WIN64}
+  CUFFTDLLNAMES: array [0 .. 7] of string = (
+    'cufft64_42_9', 'cufft64_41_28',
+    'cufft64_40_10', 'cufft64_32_16', 'cufft64_31_4', 'cufft64_30_14',
+    'cufft64_30_9', 'cufft64_30_8');
+{$ENDIF}
+  /// CUFFT API function return values
+
+type
+  /// CUFFT defines and supports the following data types
+
+  /// cufftHandle is a handle type used to store and access CUFFT plans.
+  TcufftHandle = type Cardinal;
+
+  TcufftReal = Single;
+  PcufftReal = ^TcufftReal;
+
+  TcufftRealfloat = Single;
+  PcufftDoubleReal = ^TcufftDoubleReal;
+  TcufftDoubleReal = Double;
+
+  PcufftDoubleComplex = ^TcufftDoubleComplex;
+  TcufftDoubleComplex = TVector2d;
+
+  PcufftComplex = ^TcufftComplex;
+  TcufftComplex = TVector2f;
+
+  TcufftResult = type Byte;
+
+const
+  INVALID_CUFFT_HANDLE = $FFFFFFFF;
+
+  CUFFT_SUCCESS: TcufftResult = $00;
+  CUFFT_INVALID_PLAN: TcufftResult = $01;
+  CUFFT_ALLOC_FAILED: TcufftResult = $02;
+  CUFFT_INVALID_TYPE: TcufftResult = $03;
+  CUFFT_INVALID_VALUE: TcufftResult = $04;
+  CUFFT_INTERNAL_ERROR: TcufftResult = $05;
+  CUFFT_EXEC_FAILED: TcufftResult = $06;
+  CUFFT_SETUP_FAILED: TcufftResult = $07;
+  CUFFT_INVALID_SIZE: TcufftResult = $08;
+
+type
+  TcufftType = type Cardinal;
+
+  TcudaRoundMode = (cudaRoundNearest, cudaRoundZero, cudaRoundPosInf,
+    cudaRoundMinInf);
+
+  /// CUFFT transform directions
+const
+  CUFFT_FORWARD = -1; // Forward FFT
+  CUFFT_INVERSE = 1;  // Inverse FFT
+
+  /// CUFFT supports the following transform types
+  CUFFT_R2C: TcufftType = $2A; // Real to Complex (interleaved)
+  CUFFT_C2R: TcufftType = $2C; // Complex (interleaved) to Real
+  CUFFT_C2C: TcufftType = $29; // Complex to Complex, interleaved
+  CUFFT_D2Z: TcufftType = $6A; // Double to Double-Complex
+  CUFFT_Z2D: TcufftType = $6C; // Double-Complex to Double
+  CUFFT_Z2Z: TcufftType = $69; // Double-Complex to Double-Complex
+
+  (*
+    Certain R2C and C2R transforms go much more slowly when FFTW memory
+    layout and behaviour is required. The default is "best performance",
+    which means not-compatible-with-fftw. Use the cufftSetCompatibilityMode
+    API to enable exact FFTW-like behaviour.
+
+    These flags can be ORed together to select precise FFTW compatibility
+    behaviour. The two levels presently supported are:
+
+    CUFFT_COMPATIBILITY_FFTW_PADDING
+    Inserts extra padding between packed in-place transforms for
+    batched transforms with power-of-2 size.
+
+    CUFFT_COMPATIBILITY_FFTW_C2R_ASYMMETRIC
+    Guarantees FFTW-compatible output for non-symmetric complex inputs
+    for transforms with power-of-2 size. This is only useful for
+    artificial (i.e. random) datasets as actual data will always be
+    symmetric if it has come from the real plane. If you don't
+    understand what this means, you probably don't have to use it.
+
+    CUFFT_COMPATIBILITY_FFTW
+    For convenience, enables all FFTW compatibility modes at once.
+  *)
+
+type
+
+  TcufftCompatibility = type Cardinal;
+
+const
+  CUFFT_COMPATIBILITY_NORMAL: TcufftCompatibility = $00; // The default value
+  CUFFT_COMPATIBILITY_FFTW_PADDING: TcufftCompatibility = $01;
+  CUFFT_COMPATIBILITY_FFTW_C2R_ASYMMETRIC: TcufftCompatibility = $02;
+  CUFFT_COMPATIBILITY_FFTW: TcufftCompatibility = $03;
+
+type
+
+  TcufftPlan1d = function(out plan: TcufftHandle; nx: Integer;
+    atype: TcufftType; batch: Integer): TcufftResult;stdcall;
+  TcufftPlan2d = function(out plan: TcufftHandle; nx: Integer; ny: Integer;
+    atype: TcufftType): TcufftResult;stdcall;
+  TcufftPlan3d = function(out plan: TcufftHandle; nx: Integer; ny: Integer;
+    nz: Integer; atype: TcufftType): TcufftResult;stdcall;
+  TcufftDestroy = function(plan: TcufftHandle): TcufftResult;stdcall;
+  TcufftPlanMany = function(out plan: TcufftHandle; rank: Integer;
+    var n: Integer; var inembed: Integer; istride, idist: Integer;
+    var onembed: Integer; ostride, odist: Integer; ctype: TcufftType;
+    batch: Integer): TcufftResult;stdcall;
+  TcufftExecC2C = function(plan: TcufftHandle; idata: PcufftComplex;
+    odata: PcufftComplex; direction: Integer): TcufftResult;stdcall;
+  TcufftExecR2C = function(plan: TcufftHandle; idata: PcufftReal;
+    odata: PcufftComplex): TcufftResult;stdcall;
+  TcufftExecC2R = function(plan: TcufftHandle; idata: PcufftComplex;
+    odata: PcufftReal): TcufftResult;stdcall;
+  TcufftExecZ2Z = function(plan: TcufftHandle; idata: PcufftDoubleComplex;
+    odata: PcufftDoubleComplex; direction: Integer): TcufftResult;stdcall;
+  TcufftExecD2Z = function(plan: TcufftHandle; idata: PcufftDoubleReal;
+    odata: PcufftDoubleComplex): TcufftResult;stdcall;
+  TcufftExecZ2D = function(plan: TcufftHandle; idata: PcufftDoubleComplex;
+    odata: PcufftDoubleReal): TcufftResult;stdcall;
+  TcufftSetStream = function(p: TcufftHandle; stream: Integer): TcufftResult;stdcall;
+  TcufftSetCompatibilityMode = function(plan: TcufftHandle;
+    mode: TcufftCompatibility): TcufftResult;stdcall;
+
+var
+  cufftPlan1d: TcufftPlan1d;
+  cufftPlan2d: TcufftPlan2d;
+  cufftPlan3d: TcufftPlan3d;
+  cufftDestroy: TcufftDestroy;
+  cufftPlanMany: TcufftPlanMany;
+  cufftExecC2C: TcufftExecC2C;
+  cufftExecR2C: TcufftExecR2C;
+  cufftExecC2R: TcufftExecC2R;
+  cufftExecZ2Z: TcufftExecZ2Z;
+  cufftExecD2Z: TcufftExecD2Z;
+  cufftExecZ2D: TcufftExecZ2D;
+  cufftSetStream: TcufftSetStream;
+  cufftSetCompatibilityMode: TcufftSetCompatibilityMode;
+
+function InitCUFFT: Boolean;
+procedure CloseCUFFT;
+function InitCUFFTFromLibrary(const LibName: WideString): Boolean;
+function IsCUFFTInitialized: Boolean;
+function Get_CUDA_FFT_Error_String(AError: TcufftResult): string;
+
+//---------------------------------------------------------
+implementation
+//---------------------------------------------------------
+
+const
+  cufftPlan1dName = 'cufftPlan1d';
+  cufftPlan2dName = 'cufftPlan2d';
+  cufftPlan3dName = 'cufftPlan3d';
+  cufftDestroyName = 'cufftDestroy';
+  cufftPlanManyName = 'cufftPlanMany';
+  cufftExecC2CName = 'cufftExecC2C';
+  cufftExecR2CName = 'cufftExecR2C';
+  cufftExecC2RName = 'cufftExecC2R';
+  cufftExecZ2ZName = 'cufftExecZ2Z';
+  cufftExecD2ZName = 'cufftExecD2Z';
+  cufftExecZ2DName = 'cufftExecZ2D';
+  cufftSetStreamName = 'cufftSetStream';
+  cufftSetCompatibilityModeName = 'cufftSetCompatibilityMode';
+
+const
+  INVALID_MODULEHANDLE = 0;
+
+var
+{$IFDEF MSWINDOWS}
+  CUFFTHandle: HINST = INVALID_MODULEHANDLE;
+{$ENDIF}{$IFDEF LINUX}
+  CUFFTHandle: TLibHandle = INVALID_MODULEHANDLE;
+{$ENDIF}
+
+{$IFDEF USE_CUDA_DEBUG_MODE}
+var
+  cufftPlan1d_: TcufftPlan1d;
+  cufftPlan2d_: TcufftPlan2d;
+  cufftPlan3d_: TcufftPlan3d;
+  cufftDestroy_: TcufftDestroy;
+  cufftPlanMany_: TcufftPlanMany;
+  cufftExecC2C_: TcufftExecC2C;
+  cufftExecR2C_: TcufftExecR2C;
+  cufftExecC2R_: TcufftExecC2R;
+  cufftExecZ2Z_: TcufftExecZ2Z;
+  cufftExecD2Z_: TcufftExecD2Z;
+  cufftExecZ2D_: TcufftExecZ2D;
+  cufftSetStream_: TcufftSetStream;
+  cufftSetCompatibilityMode_: TcufftSetCompatibilityMode;
+
+function cufftPlan1dShell(out plan: TcufftHandle; nx: Integer;
+  atype: TcufftType; batch: Integer): TcufftResult;stdcall;
+begin
+  Result := cufftPlan1d_(plan, nx, atype, batch);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlan1dName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftPlan2dShell(out plan: TcufftHandle; nx: Integer; ny: Integer;
+  atype: TcufftType): TcufftResult;stdcall;
+begin
+  Result := cufftPlan2d_(plan, nx, ny, atype);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlan2dName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftPlan3dShell(out plan: TcufftHandle; nx: Integer; ny: Integer;
+  nz: Integer; atype: TcufftType): TcufftResult;stdcall;
+begin
+  Result := cufftPlan3d_(plan, nx, ny, nz, atype);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlan3dName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftDestroyShell(plan: TcufftHandle): TcufftResult;stdcall;
+begin
+  Result := cufftDestroy_(plan);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftDestroyName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftPlanManyShell(out plan: TcufftHandle; rank: Integer;
+  var n: Integer; var inembed: Integer; istride, idist: Integer;
+  var onembed: Integer; ostride, odist: Integer; ctype: TcufftType;
+  batch: Integer): TcufftResult;stdcall;
+begin
+  Result := cufftPlanMany_(plan, rank, n, inembed, istride, idist, onembed,
+    ostride, odist, ctype, batch);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftPlanManyName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftExecC2CShell(plan: TcufftHandle; idata: PcufftComplex;
+  odata: PcufftComplex; direction: Integer): TcufftResult;stdcall;
+begin
+  Result := cufftExecC2C_(plan, idata, odata, direction);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecC2CName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftExecR2CShell(plan: TcufftHandle; idata: PcufftReal;
+  odata: PcufftComplex): TcufftResult;stdcall;
+begin
+  Result := cufftExecR2C_(plan, idata, odata);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecR2CName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftExecC2RShell(plan: TcufftHandle; idata: PcufftComplex;
+  odata: PcufftReal): TcufftResult;stdcall;
+begin
+  Result := cufftExecC2R_(plan, idata, odata);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecC2RName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftExecZ2ZShell(plan: TcufftHandle; idata: PcufftDoubleComplex;
+  odata: PcufftDoubleComplex; direction: Integer): TcufftResult;stdcall;
+begin
+  Result := cufftExecZ2Z_(plan, idata, odata, direction);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecZ2ZName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftExecD2ZShell(plan: TcufftHandle; idata: PcufftDoubleReal;
+  odata: PcufftDoubleComplex): TcufftResult;stdcall;
+begin
+  Result := cufftExecD2Z_(plan, idata, odata);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecD2ZName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftExecZ2DShell(plan: TcufftHandle; idata: PcufftDoubleComplex;
+  odata: PcufftDoubleReal): TcufftResult;stdcall;
+begin
+  Result := cufftExecZ2D_(plan, idata, odata);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftExecZ2DName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftSetStreamShell(p: TcufftHandle; stream: Integer): TcufftResult;stdcall;
+begin
+  Result := cufftSetStream_(p, stream);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftSetStreamName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+
+function cufftSetCompatibilityModeShell(plan: TcufftHandle;
+  mode: TcufftCompatibility): TcufftResult;stdcall;
+begin
+  Result := cufftSetCompatibilityMode_(plan, mode);
+  if Result <> CUFFT_SUCCESS then
+    GLSLogger.LogErrorFmt(strFFTFuncRetErr, [cufftSetCompatibilityModeName,
+      Get_CUDA_FFT_Error_String(Result)]);
+end;
+{$ENDIF GLS_CUDA_DEBUG_MODE}
+
+function CUFFTGetProcAddress(ProcName: PAnsiChar): Pointer;
+begin
+  result := GetProcAddress(CUFFTHandle, ProcName);
+end;
+
+function InitCUFFT: Boolean;
+var
+  I: Integer;
+begin
+  Result := True;
+  if CUFFTHandle = INVALID_MODULEHANDLE then
+  begin
+    for I := 0 to High(CUFFTDLLNAMES) do
+    begin
+      if InitCUFFTFromLibrary(CUFFTDLLNAMES[I] + '.dll') then
+        Exit;
+    end;
+    Result := False;
+  end;
+end;
+
+procedure CloseCUFFT;
+begin
+  if CUFFTHandle <> INVALID_MODULEHANDLE then
+  begin
+    FreeLibrary(Cardinal(CUFFTHandle));
+    CUFFTHandle := INVALID_MODULEHANDLE;
+  end;
+end;
+
+function InitCUFFTFromLibrary(const LibName: WideString): Boolean;
+begin
+  CloseCUFFT;
+  CUFFTHandle := GetModuleHandleW(PWideChar(LibName));
+  if CUFFTHandle = INVALID_MODULEHANDLE then
+    CUFFTHandle := LoadLibraryW(PWideChar(LibName));
+  if CUFFTHandle = INVALID_MODULEHANDLE then
+    Exit(False);
+{$IFNDEF USE_CUDA_DEBUG_MODE}
+  cufftPlan1d := CUFFTGetProcAddress(cufftPlan1dName);
+  cufftPlan2d := CUFFTGetProcAddress(cufftPlan2dName);
+  cufftPlan3d := CUFFTGetProcAddress(cufftPlan3dName);
+  cufftDestroy := CUFFTGetProcAddress(cufftDestroyName);
+  cufftPlanMany := CUFFTGetProcAddress(cufftPlanManyName);
+  cufftExecC2C := CUFFTGetProcAddress(cufftExecC2CName);
+  cufftExecR2C := CUFFTGetProcAddress(cufftExecR2CName);
+  cufftExecC2R := CUFFTGetProcAddress(cufftExecC2RName);
+  cufftExecZ2Z := CUFFTGetProcAddress(cufftExecZ2ZName);
+  cufftExecD2Z := CUFFTGetProcAddress(cufftExecD2ZName);
+  cufftExecZ2D := CUFFTGetProcAddress(cufftExecZ2DName);
+  cufftSetStream := CUFFTGetProcAddress(cufftSetStreamName);
+  cufftSetCompatibilityMode := CUFFTGetProcAddress(cufftSetCompatibilityModeName);
+{$ELSE}
+  cufftPlan1d_ := CUFFTGetProcAddress(cufftPlan1dName);
+  cufftPlan1d := cufftPlan1dShell;
+  cufftPlan2d_ := CUFFTGetProcAddress(cufftPlan2dName);
+  cufftPlan2d := cufftPlan2dShell;
+  cufftPlan3d_ := CUFFTGetProcAddress(cufftPlan3dName);
+  cufftPlan3d := cufftPlan3dShell;
+  cufftDestroy_ := CUFFTGetProcAddress(cufftDestroyName);
+  cufftDestroy := cufftDestroyShell;
+  cufftPlanMany_ := CUFFTGetProcAddress(cufftPlanManyName);
+  cufftPlanMany := cufftPlanManyShell;
+  cufftExecC2C_ := CUFFTGetProcAddress(cufftExecC2CName);
+  cufftExecC2C := cufftExecC2CShell;
+  cufftExecR2C_ := CUFFTGetProcAddress(cufftExecR2CName);
+  cufftExecR2C := cufftExecR2CShell;
+  cufftExecC2R_ := CUFFTGetProcAddress(cufftExecC2RName);
+  cufftExecC2R := cufftExecC2RShell;
+  cufftExecZ2Z_ := CUFFTGetProcAddress(cufftExecZ2ZName);
+  cufftExecZ2Z := cufftExecZ2ZShell;
+  cufftExecD2Z_ := CUFFTGetProcAddress(cufftExecD2ZName);
+  cufftExecD2Z := cufftExecD2ZShell;
+  cufftExecZ2D_ := CUFFTGetProcAddress(cufftExecZ2DName);
+  cufftExecZ2D := cufftExecZ2DShell;
+  cufftSetStream_ := CUFFTGetProcAddress(cufftSetStreamName);
+  cufftSetStream := cufftSetStreamShell;
+  cufftSetCompatibilityMode_ := CUFFTGetProcAddress(cufftSetCompatibilityModeName);
+  cufftSetCompatibilityMode := cufftSetCompatibilityModeShell;
+{$ENDIF}
+
+ {$IFDEF USE_LOGGING}
+  LogInfoFmt('%s loaded...', [LibName]);
+ {$ENDIF}
+  Result := True;
+end;
+
+function IsCUFFTInitialized: Boolean;
+begin
+  result := (CUFFTHandle <> INVALID_MODULEHANDLE);
+end;
+
+function Get_CUDA_FFT_Error_String(AError: TcufftResult): string;
+begin
+  if AError = CUFFT_SUCCESS then
+    result := 'CUFFT operation is successful.'
+  else if AError = CUFFT_INVALID_PLAN then
+    result := 'CUFFT is passed an invalid plan handle.'
+  else if AError = CUFFT_ALLOC_FAILED then
+    result := 'CUFFT failed to allocate GPU memory.'
+  else if AError = CUFFT_INVALID_TYPE then
+    result := 'The user requests an unsupported type.'
+  else if AError = CUFFT_INVALID_VALUE then
+    result := 'The user specifies a bad memory pointer.'
+  else if AError = CUFFT_INTERNAL_ERROR then
+    result := 'Used for all internal driver errors.'
+  else if AError = CUFFT_EXEC_FAILED then
+    result := 'CUFFT failed to execute an FFT on the GPU.'
+  else if AError = CUFFT_SETUP_FAILED then
+    result := 'The CUFFT library failed to initialize.'
+  else if AError = CUFFT_INVALID_SIZE then
+    result := 'The user specifies an unsupported FFT size.'
+  else
+    result := 'Unknown error.'
+end;
+
+end.

+ 1140 - 1141
Source/GLS.CUDAGraphics.pas → Source/GPU.CUDAGraphics.pas

@@ -1,1141 +1,1140 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAGraphics;
-
-(* CUDA Graphics for GLScene *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  Winapi.OpenGL,
-  Winapi.OpenGLext,
-  System.Classes,
-  System.SysUtils,
-
-  GLS.OpenGLTokens,
-  GLS.CUDAApi,
-  GLS.CUDA,
-
-  GLS.Context,
-  GLS.State,
-  GLS.Scene,
-  GLS.Graphics,
-  GLS.Material,
-  GLS.Strings,
-  GLS.TextureFormat,
-  GLS.Texture,
-  GLSL.Shader,
-  GLSL.ShaderParameter,
-  GLS.PersistentClasses,
-  {$IFDEF USE_LOGGING} GLS.Logger, {$ENDIF}
-  GLS.RenderContextInfo;
-
-
-type
-  TGLVertexAttribute = class;
-  TGLVertexAttributes = class;
-
-  TOnBeforeKernelLaunch = procedure(Sender: TGLVertexAttribute) of object;
-
-  TGLVertexAttribute = class(TCollectionItem)
-  private
-    FName: string;
-    FType: TGLSLDataType;
-    FFunc: TCUDAFunction;
-    FLocation: Integer;
-    FOnBeforeKernelLaunch: TOnBeforeKernelLaunch;
-    procedure SetName(const AName: string);
-    procedure SetType(AType: TGLSLDataType);
-    procedure SetFunc(AFunc: TCUDAFunction);
-    function GetLocation: Integer;
-    function GetOwner: TGLVertexAttributes; reintroduce;
-  public
-    constructor Create(ACollection: TCollection); override;
-    procedure NotifyChange(Sender: TObject);
-    property Location: Integer read GetLocation;
-  published
-    property Name: string read FName write SetName;
-    property GLSLType: TGLSLDataType read FType write SetType;
-    property KernelFunction: TCUDAFunction read FFunc write SetFunc;
-    property OnBeforeKernelLaunch: TOnBeforeKernelLaunch read
-      FOnBeforeKernelLaunch write FOnBeforeKernelLaunch;
-  end;
-
-  TGLVertexAttributes = class(TOwnedCollection)
-  private
-    procedure SetItems(Index: Integer; const AValue: TGLVertexAttribute);
-    function GetItems(Index: Integer): TGLVertexAttribute;
-  public
-    constructor Create(AOwner: TComponent);
-    procedure NotifyChange(Sender: TObject);
-    function MakeUniqueName(const ANameRoot: string): string;
-    function GetAttributeByName(const AName: string): TGLVertexAttribute;
-    function Add: TGLVertexAttribute;
-    property Attributes[Index: Integer]: TGLVertexAttribute read GetItems
-      write SetItems; default;
-  end;
-
-  TFeedBackMeshPrimitive = (fbmpPoint, fbmpLine, fbmpTriangle);
-  TFeedBackMeshLaunching = (fblCommon, fblOnePerAtttribute);
-
-  TGLCustomFeedBackMesh = class(TGLBaseSceneObject)
-  private
-    FGeometryResource: TCUDAGraphicResource;
-    FAttributes: TGLVertexAttributes;
-    FVAO: TGLVertexArrayHandle;
-    FVBO: TGLVBOArrayBufferHandle;
-    FEBO: TGLVBOElementArrayHandle;
-    FPrimitiveType: TFeedBackMeshPrimitive;
-    FVertexNumber: Integer;
-    FElementNumber: Integer;
-    FShader: TGLSLShader;
-    FCommonFunc: TCUDAFunction;
-    FLaunching: TFeedBackMeshLaunching;
-    FBlend: Boolean;
-    procedure SetAttributes(AValue: TGLVertexAttributes);
-    procedure SetPrimitiveType(AValue: TFeedBackMeshPrimitive);
-    procedure SetVertexNumber(AValue: Integer);
-    procedure SetElementNumber(AValue: Integer);
-    procedure SetShader(AShader: TGLSLShader);
-    procedure SetCommonFunc(AFunc: TCUDAFunction);
-  protected
-    procedure Notification(AComponent: TComponent;
-      Operation: TOperation); override;
-    procedure RefreshAttributes;
-    procedure AllocateHandles;
-    procedure LaunchKernels;
-  protected
-    property Attributes: TGLVertexAttributes read FAttributes write SetAttributes;
-    // GLSL shader as material. If it absent or disabled - nothing be drawen.
-    property Shader: TGLSLShader read FShader write SetShader;
-    // Primitive type.
-    property PrimitiveType: TFeedBackMeshPrimitive read FPrimitiveType
-      write SetPrimitiveType default fbmpPoint;
-    // Number of vertexes in array buffer.
-    property VertexNumber: Integer read FVertexNumber
-      write SetVertexNumber default 1;
-    // Number of indexes in element buffer. Zero to disable.
-    property ElementNumber: Integer read FElementNumber
-      write SetElementNumber default 0;
-    (* Used for all attributes and elements if Launching = fblCommon
-       otherwise used own attribute function and this for elements. *)
-    property CommonKernelFunction: TCUDAFunction read FCommonFunc
-      write SetCommonFunc;
-    (* Define mode of manufacturer launching:
-       fblCommon - single launch for all,
-       flOnePerAtttribute - one launch per attribute and elements *)
-    property Launching: TFeedBackMeshLaunching read FLaunching
-      write FLaunching default fblCommon;
-    //Defines if the object uses blending for object sorting purposes.
-    property Blend: Boolean read FBlend write FBlend default False;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure DoRender(var ARci: TGLRenderContextInfo;
-      ARenderSelf, ARenderChildren: Boolean); override;
-    property ArrayBufferHandle: TGLVBOArrayBufferHandle read FVBO;
-    property ElementArrayHandle: TGLVBOElementArrayHandle read FEBO;
-  end;
-
-  TGLFeedbackMesh = class(TGLCustomFeedBackMesh)
-  published
-    property Attributes;
-    property Shader;
-    property PrimitiveType;
-    property VertexNumber;
-    property ElementNumber;
-    property CommonKernelFunction;
-    property Launching;
-    property Blend;
-    property ObjectsSorting;
-    property VisibilityCulling;
-    property Direction;
-    property PitchAngle;
-    property Position;
-    property RollAngle;
-    property Scale;
-    property ShowAxes;
-    property TurnAngle;
-    property Up;
-    property Visible;
-    property Pickable;
-    property OnProgress;
-    property OnPicked;
-    property Behaviours;
-    property Effects;
-  end;
-
-  TCUDAImageResource = class(TCUDAGraphicResource)
-  private
-    fMaterialLibrary: TGLMaterialLibrary;
-    fTextureName: TGLLibMaterialName;
-    procedure SetMaterialLibrary(const Value: TGLMaterialLibrary);
-    procedure SetTextureName(const Value: TGLLibMaterialName);
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    procedure Notification(AComponent: TComponent; Operation: TOperation);
-      override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure MapResources; override;
-    procedure UnMapResources; override;
-    procedure BindArrayToTexture(var cudaArray: TCUDAMemData;
-      ALeyer, ALevel: LOngWord); override;
-  published
-    property TextureName: TGLLibMaterialName read fTextureName write
-      SetTextureName;
-    property MaterialLibrary: TGLMaterialLibrary read fMaterialLibrary write
-      SetMaterialLibrary;
-    property Mapping;
-  end;
-
-  TCUDAGeometryResource = class(TCUDAGraphicResource)
-  private
-    FFeedBackMesh: TGLCustomFeedBackMesh;
-    procedure SetFeedBackMesh(const Value: TGLCustomFeedBackMesh);
-    function GetAttribArraySize(AAttr: TGLVertexAttribute): LongWord;
-  protected
-    procedure AllocateHandles; override;
-    procedure DestroyHandles; override;
-    procedure Notification(AComponent: TComponent; Operation: TOperation); override;
-    function GetAttributeArraySize(const AName: string): LongWord; override;
-    function GetAttributeArrayAddress(const AName: string): Pointer; override;
-    function GetElementArrayDataSize: LongWord; override;
-    function GetElementArrayAddress: Pointer; override;
-  public
-    constructor Create(AOwner: TComponent); override;
-    destructor Destroy; override;
-    procedure MapResources; override;
-    procedure UnMapResources; override;
-    property AttributeDataSize[const AttribName: string]: LongWord read
-      GetAttributeArraySize;
-    property AttributeDataAddress[const AttribName: string]: Pointer read
-      GetAttributeArrayAddress;
-    property IndexDataSize: LongWord read GetElementArrayDataSize;
-    property IndexDataAddress: Pointer read GetElementArrayAddress;
-  published
-    property FeedBackMesh: TGLCustomFeedBackMesh read FFeedBackMesh write
-      SetFeedBackMesh;
-    property Mapping;
-  end;
-
-//---------------------------------------------------------------------------
-implementation
-//---------------------------------------------------------------------------
-
-// ------------------
-// ------------------ TCUDAGLImageResource ------------------
-// ------------------
-
-constructor TCUDAImageResource.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  fHandle[0] := nil;
-  fResourceType := rtTexture;
-  FGLContextHandle := TGLVirtualHandle.Create;
-  FGLContextHandle.OnAllocate := OnGLHandleAllocate;
-  FGLContextHandle.OnDestroy := OnGLHandleDestroy;
-end;
-
-destructor TCUDAImageResource.Destroy;
-begin
-  FGLContextHandle.Destroy;
-  inherited;
-end;
-
-procedure TCUDAImageResource.SetMaterialLibrary(const Value:
-  TGLMaterialLibrary);
-begin
-  if fMaterialLibrary <> Value then
-  begin
-    if Assigned(fMaterialLibrary) then
-      fMaterialLibrary.RemoveFreeNotification(Self);
-    fMaterialLibrary := Value;
-    if Assigned(fMaterialLibrary) then
-    begin
-      fMaterialLibrary.FreeNotification(Self);
-      if fMaterialLibrary.TextureByName(fTextureName) <> nil then
-        DestroyHandles;
-    end;
-  end;
-end;
-
-procedure TCUDAImageResource.SetTextureName(const Value: TGLLibMaterialName);
-begin
-  if fTextureName <> Value then
-  begin
-    fTextureName := Value;
-    DestroyHandles;
-  end;
-end;
-
-procedure TCUDAImageResource.UnMapResources;
-begin
-  if FMapCounter > 0 then
-    Dec(FMapCounter);
-
-  if FMapCounter = 0 then
-  begin
-    if Assigned(FHandle[0]) then
-    begin
-      Context.Requires;
-      FStatus := cuGraphicsUnMapResources(1, @FHandle[0], nil);
-      Context.Release;
-      if FStatus <> CUDA_SUCCESS then
-        Abort;
-    end;
-  end;
-end;
-
-procedure TCUDAImageResource.AllocateHandles;
-const
-  cMapping: array[TCUDAMapping] of TCUgraphicsMapResourceFlags = (
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE,
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY,
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
-var
-  LTexture: TGLTexture;
-  glHandle: Cardinal;
-begin
-  FGLContextHandle.AllocateHandle;
-
-  if FGLContextHandle.IsDataNeedUpdate
-    and Assigned(FMaterialLibrary)
-    and (Length(FTextureName) > 0) then
-  begin
-    inherited;
-
-    LTexture := FMaterialLibrary.TextureByName(FTextureName);
-    if Assigned(LTexture) then
-    begin
-      glHandle := LTexture.AllocateHandle;
-      if glHandle = 0 then
-        Abort;
-
-      Context.Requires;
-      DestroyHandles;
-
-      FStatus := cuGraphicsGLRegisterImage(
-        FHandle[0],
-        glHandle,
-        DecodeTextureTarget(LTexture.Image.NativeTextureTarget),
-        cMapping[fMapping]);
-
-      Context.Release;
-
-      if FStatus <> CUDA_SUCCESS then
-        Abort;
-
-      FGLContextHandle.NotifyDataUpdated;
-    end;
-  end;
-end;
-
-procedure TCUDAImageResource.DestroyHandles;
-begin
-  if Assigned(FHandle[0]) then
-  begin
-    inherited;
-    Context.Requires;
-    FStatus := cuGraphicsUnregisterResource(FHandle[0]);
-    Context.Release;
-    FHandle[0] := nil;
-    FGLContextHandle.NotifyChangesOfData;
-  end;
-end;
-
-procedure TCUDAImageResource.MapResources;
-begin
-  AllocateHandles;
-
-  if FMapCounter = 0 then
-  begin
-    if Assigned(FHandle[0]) then
-    begin
-      Context.Requires;
-      FStatus := cuGraphicsMapResources(1, @FHandle[0], nil);
-      Context.Release;
-      if FStatus <> CUDA_SUCCESS then
-        Abort;
-    end;
-  end;
-  Inc(FMapCounter);
-end;
-
-procedure TCUDAImageResource.Notification(AComponent: TComponent; Operation:
-  TOperation);
-begin
-  inherited;
-  if (AComponent = fMaterialLibrary) and (Operation = opRemove) then
-  begin
-    fMaterialLibrary := nil;
-    fTextureName := '';
-    DestroyHandles;
-  end;
-end;
-
-procedure TCUDAImageResource.BindArrayToTexture(var cudaArray: TCUDAMemData;
-  ALeyer, ALevel: LOngWord);
-var
-  LTexture: TGLTexture;
-  newArray: PCUarray;
-begin
-  if FMapCounter = 0 then
-  begin
-   {$IFDEF USE_LOGGING}
-    LogError(strFailToBindArrayToTex);
-   {$ENDIF}
-    Abort;
-  end;
-
-  Context.Requires;
-  FStatus := cuGraphicsSubResourceGetMappedArray(
-    newArray, FHandle[0], ALeyer, ALevel);
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  LTexture := FMaterialLibrary.TextureByName(FTextureName);
-  SetArray(cudaArray, newArray, True, LTexture.TexDepth > 0);
-end;
-
-
-
-// ------------------
-// ------------------ TCUDAGLGeometryResource ------------------
-// ------------------
-
-constructor TCUDAGeometryResource.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner);
-  FHandle[0] := nil;
-  FHandle[1] := nil;
-  FResourceType := rtBuffer;
-  FMapCounter := 0;
-  FGLContextHandle := TGLVirtualHandle.Create;
-  FGLContextHandle.OnAllocate := OnGLHandleAllocate;
-  FGLContextHandle.OnDestroy := OnGLHandleDestroy;
-end;
-
-destructor TCUDAGeometryResource.Destroy;
-begin
-  FeedBackMesh := nil;
-  FGLContextHandle.Destroy;
-  inherited;
-end;
-
-procedure TCUDAGeometryResource.SetFeedBackMesh(const Value:
-  TGLCustomFeedBackMesh);
-begin
-  if FFeedBackMesh <> Value then
-  begin
-    if Assigned(FFeedBackMesh) then
-    begin
-      FFeedBackMesh.RemoveFreeNotification(Self);
-      FFeedBackMesh.FGeometryResource := nil;
-    end;
-    FFeedBackMesh := Value;
-    if Assigned(FFeedBackMesh) then
-    begin
-      FFeedBackMesh.FreeNotification(Self);
-      FFeedBackMesh.FGeometryResource := Self;
-    end;
-    DestroyHandles;
-  end;
-end;
-
-procedure TCUDAGeometryResource.AllocateHandles;
-const
-  cMapping: array[TCUDAMapping] of TCUgraphicsMapResourceFlags = (
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE,
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY,
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
-
-begin
-  inherited;
-  FGLContextHandle.AllocateHandle;
-  if FGLContextHandle.IsDataNeedUpdate then
-  begin
-    if FFeedBackMesh.FVBO.IsDataNeedUpdate then
-      FFeedBackMesh.AllocateHandles;
-
-    Context.Requires;
-
-    DestroyHandles;
-
-    // Register vertex array
-    FStatus := cuGraphicsGLRegisterBuffer(
-      FHandle[0],
-      FFeedBackMesh.FVBO.Handle,
-      cMapping[FMapping]);
-
-    // Register element array
-    if FFeedBackMesh.ElementNumber > 0 then
-      CollectStatus(
-        cuGraphicsGLRegisterBuffer(
-          FHandle[1],
-          FFeedBackMesh.FEBO.Handle,
-          cMapping[FMapping]));
-
-    Context.Release;
-
-    if FStatus <> CUDA_SUCCESS then
-      Abort;
-
-    FGLContextHandle.NotifyDataUpdated;
-  end;
-end;
-
-procedure TCUDAGeometryResource.DestroyHandles;
-begin
-  if Assigned(fHandle[0]) or Assigned(fHandle[1]) then
-  begin
-    inherited;
-
-    Context.Requires;
-
-    while FMapCounter > 0 do
-      UnMapResources;
-
-    FStatus := CUDA_SUCCESS;
-
-    if Assigned(fHandle[0]) then
-    begin
-      CollectStatus(cuGraphicsUnregisterResource(fHandle[0]));
-      fHandle[0] := nil;
-    end;
-
-    if Assigned(fHandle[1]) then
-    begin
-      CollectStatus(cuGraphicsUnregisterResource(fHandle[1]));
-      fHandle[1] := nil;
-    end;
-
-    Context.Release;
-    FGLContextHandle.NotifyChangesOfData;
-  end;
-end;
-
-procedure TCUDAGeometryResource.Notification(AComponent: TComponent;
-  Operation:
-  TOperation);
-begin
-  inherited;
-  if (AComponent = FFeedBackMesh) and (Operation = opRemove) then
-  begin
-    FeedBackMesh := nil;
-    DestroyHandles;
-  end;
-end;
-
-procedure TCUDAGeometryResource.MapResources;
-var
-  count: Integer;
-begin
-  AllocateHandles;
-
-  if FMapCounter = 0 then
-  begin
-    if Assigned(FHandle[0]) then
-    begin
-      count := 1;
-      if Assigned(FHandle[1]) then
-        Inc(count);
-      Context.Requires;
-      FStatus := cuGraphicsMapResources(count, @FHandle[0], nil);
-      Context.Release;
-      if FStatus <> CUDA_SUCCESS then
-        Abort;
-    end;
-  end;
-  Inc(FMapCounter);
-end;
-
-procedure TCUDAGeometryResource.UnMapResources;
-var
-  count: Integer;
-begin
-  if FMapCounter > 0 then
-    Dec(FMapCounter);
-
-  if FMapCounter = 0 then
-  begin
-    if Assigned(FHandle[0]) then
-    begin
-      count := 1;
-      if Assigned(FHandle[1]) then
-        Inc(count);
-      Context.Requires;
-      FStatus := cuGraphicsUnMapResources(count, @FHandle[0], nil);
-      Context.Release;
-      if FStatus <> CUDA_SUCCESS then
-        Abort;
-    end;
-  end;
-end;
-
-function TCUDAGeometryResource.GetAttribArraySize(AAttr: TGLVertexAttribute): LongWord;
-var
-  typeSize: LongWord;
-begin
-  case AAttr.GLSLType of
-    GLSLType1F: typeSize := SizeOf(Single);
-    GLSLType2F: typeSize := 2 * SizeOf(Single);
-    GLSLType3F: typeSize := 3 * SizeOf(Single);
-    GLSLType4F: typeSize := 4 * SizeOf(Single);
-    GLSLType1I: typeSize := SizeOf(Integer);
-    GLSLType2I: typeSize := 2 * SizeOf(Integer);
-    GLSLType3I: typeSize := 3 * SizeOf(Integer);
-    GLSLType4I: typeSize := 4 * SizeOf(Integer);
-    GLSLType1UI: typeSize := SizeOf(Integer);
-    GLSLType2UI: typeSize := 2 * SizeOf(Integer);
-    GLSLType3UI: typeSize := 3 * SizeOf(Integer);
-    GLSLType4UI: typeSize := 4 * SizeOf(Integer);
-    GLSLTypeMat2F: typeSize := 4 * SizeOf(Single);
-    GLSLTypeMat3F: typeSize := 9 * SizeOf(Single);
-    GLSLTypeMat4F: typeSize := 16 * SizeOf(Single);
-  else
-    begin
-      Assert(False, strErrorEx + strUnknownType);
-      typeSize := 0;
-    end;
-  end;
-  Result := Cardinal(FFeedBackMesh.VertexNumber) * typeSize;
-end;
-
-function TCUDAGeometryResource.GetAttributeArraySize(
-  const AName: string): LongWord;
-var
-  LAttr: TGLVertexAttribute;
-begin
-  Result := 0;
-  LAttr := FFeedBackMesh.Attributes.GetAttributeByName(AName);
-  if not Assigned(LAttr) then
-    exit;
-  if LAttr.GLSLType = GLSLTypeUndefined then
-    exit;
-  Result := GetAttribArraySize(LAttr);
-end;
-
-function TCUDAGeometryResource.GetAttributeArrayAddress(
-  const AName: string): Pointer;
-var
-  i: Integer;
-  Size: Cardinal;
-  MapPtr: Pointer;
-  LAttr: TGLVertexAttribute;
-begin
-  Result := nil;
-  if FMapCounter = 0 then
-    exit;
-  LAttr := FFeedBackMesh.Attributes.GetAttributeByName(AName);
-  if not Assigned(LAttr) then
-    exit;
-
-  for i := 0 to LAttr.Index - 1 do
-    Inc(PByte(Result), GetAttribArraySize(FFeedBackMesh.Attributes[i]));
-
-  Context.Requires;
-  MapPtr := nil;
-  FStatus := cuGraphicsResourceGetMappedPointer(
-    MapPtr, Size, FHandle[0]);
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  if Cardinal(Result) + GetAttribArraySize(LAttr) > Size then
-  begin
-    {$IFDEF USE_LOGGING}
-    LogError(strOutOfAttribSize);
-   {$ENDIF}
-    Abort;
-  end;
-
-  Inc(Pbyte(Result), Cardinal(MapPtr));
-end;
-
-function TCUDAGeometryResource.GetElementArrayDataSize: LongWord;
-begin
-  Result := FFeedBackMesh.ElementNumber * SizeOf(Cardinal);
-end;
-
-function TCUDAGeometryResource.GetElementArrayAddress: Pointer;
-var
-  Size: Cardinal;
-  MapPtr: Pointer;
-begin
-  Result := nil;
-  if (FHandle[1] = nil) and (FMapCounter = 0) then
-    exit;
-
-  Context.Requires;
-  MapPtr := nil;
-  FStatus := cuGraphicsResourceGetMappedPointer(MapPtr, Size, FHandle[1]);
-  Context.Release;
-
-  if FStatus <> CUDA_SUCCESS then
-    Abort;
-
-  if GetElementArrayDataSize > Size then
-  begin
-    {$IFDEF USE_LOGGING}
-    LogError(strOutOfElementSize);
-    {$ENDIF}
-    Abort;
-  end;
-
-  Inc(Pbyte(Result), Cardinal(MapPtr));
-end;
-
-
-
-// -----------------------
-// ----------------------- TGLVertexAttribute -------------------
-// -----------------------
-
-
-constructor TGLVertexAttribute.Create(ACollection: TCollection);
-begin
-  inherited;
-  FName := GetOwner.MakeUniqueName('Attrib');
-  FType := GLSLTypeUndefined;
-  FLocation := -1;
-end;
-
-procedure TGLVertexAttribute.SetFunc(AFunc: TCUDAFunction);
-var
-  LMesh: TGLCustomFeedBackMesh;
-begin
-  LMesh := TGLCustomFeedBackMesh(GetOwner.GetOwner);
-  if Assigned(FFunc) then
-    FFunc.RemoveFreeNotification(LMesh);
-  FFunc := AFunc;
-  if Assigned(FFunc) then
-    FFunc.FreeNotification(LMesh);
-end;
-
-procedure TGLVertexAttribute.SetName(const AName: string);
-begin
-  if AName <> FName then
-  begin
-    FName := '';
-    FName := GetOwner.MakeUniqueName(AName);
-    NotifyChange(Self);
-  end;
-end;
-
-procedure TGLVertexAttribute.SetType(AType: TGLSLDataType);
-begin
-  if AType <> FType then
-  begin
-    FType := AType;
-    NotifyChange(Self);
-  end;
-end;
-
-function TGLVertexAttribute.GetLocation: Integer;
-begin
-  if FLocation < 0 then
-    FLocation := gl.GetAttribLocation(
-      CurrentGLContext.GLStates.CurrentProgram,
-      PAnsiChar(AnsiString(FName)));
-  Result := FLocation;
-end;
-
-function TGLVertexAttribute.GetOwner: TGLVertexAttributes;
-begin
-  Result := TGLVertexAttributes(Collection);
-end;
-
-procedure TGLVertexAttribute.NotifyChange(Sender: TObject);
-begin
-  GetOwner.NotifyChange(Self);
-end;
- 
-
-// -----------------------
-// ----------------------- TGLVertexAttributes -------------------
-// -----------------------
-
-function TGLVertexAttributes.Add: TGLVertexAttribute;
-begin
-  Result := (inherited Add) as TGLVertexAttribute;
-end;
-
-constructor TGLVertexAttributes.Create(AOwner: TComponent);
-begin
-  inherited Create(AOwner, TGLVertexAttribute);
-end;
-
-function TGLVertexAttributes.GetAttributeByName(
-  const AName: string): TGLVertexAttribute;
-var
-  I: Integer;
-  A: TGLVertexAttribute;
-begin
-  // Brute-force, there no need optimization
-  for I := 0 to Count - 1 do
-  begin
-    A := TGLVertexAttribute(Items[i]);
-    if A.Name = AName then
-      Exit(A);
-  end;
-  Result := nil;
-end;
-
-function TGLVertexAttributes.GetItems(Index: Integer): TGLVertexAttribute;
-begin
-  Result := TGLVertexAttribute(inherited Items[index]);
-end;
-
-function TGLVertexAttributes.MakeUniqueName(const ANameRoot: string): string;
-var
-  I: Integer;
-begin
-  Result := ANameRoot;
-  I := 1;
-  while GetAttributeByName(Result) <> nil do
-  begin
-    Result := ANameRoot + IntToStr(I);
-    Inc(I);
-  end;
-end;
-
-procedure TGLVertexAttributes.NotifyChange(Sender: TObject);
-begin
-  TGLCustomFeedBackMesh(GetOwner).NotifyChange(Self);
-end;
-
-procedure TGLVertexAttributes.SetItems(Index: Integer;
-  const AValue: TGLVertexAttribute);
-begin
-  inherited Items[index] := AValue;
-end;
-
-
-
-// -----------------------
-// ----------------------- TGLCustomFeedBackMesh -------------------
-// -----------------------
-
-procedure TGLCustomFeedBackMesh.AllocateHandles;
-var
-  I, L: Integer;
-  Size, Offset: Cardinal;
-  GR: TCUDAGeometryResource;
-  EnabledLocations: array[0..GLS_VERTEX_ATTR_NUM - 1] of Boolean;
-begin
-  FVAO.AllocateHandle;
-  FVBO.AllocateHandle;
-  FEBO.AllocateHandle;
-
-  if Assigned(FGeometryResource) then
-  begin
-    GR := TCUDAGeometryResource(FGeometryResource);
-    size := 0;
-    for I := 0 to Attributes.Count - 1 do
-      Inc(size, GR.GetAttribArraySize(Attributes[I]));
-
-    FVAO.Bind;
-    FVBO.BindBufferData(nil, size, GL_STREAM_DRAW);
-    if FElementNumber > 0 then
-      FEBO.BindBufferData(nil, GR.GetElementArrayDataSize, GL_STREAM_DRAW)
-    else
-      FEBO.UnBind; // Just in case
-
-    // Predisable attributes
-    for I := 0 to GLS_VERTEX_ATTR_NUM - 1 do
-      EnabledLocations[I] := false;
-
-    Offset := 0;
-    for I := 0 to Attributes.Count - 1 do
-    begin
-      L := Attributes[I].Location;
-      if L > -1 then
-      begin
-        EnabledLocations[I] := True;
-        case Attributes[I].GLSLType of
-            GLSLType1F:  gl.VertexAttribPointer(L, 1, GL_FLOAT, false, 0, pointer(Offset));
-            GLSLType2F:  gl.VertexAttribPointer(L, 2, GL_FLOAT, false, 0, pointer(Offset));
-            GLSLType3F:  gl.VertexAttribPointer(L, 3, GL_FLOAT, false, 0, pointer(Offset));
-            GLSLType4F:  gl.VertexAttribPointer(L, 4, GL_FLOAT, false, 0, pointer(Offset));
-            GLSLType1I:  gl.VertexAttribIPointer(L, 1, GL_INT, 0, pointer(Offset));
-            GLSLType2I:  gl.VertexAttribIPointer(L, 2, GL_INT, 0, pointer(Offset));
-            GLSLType3I:  gl.VertexAttribIPointer(L, 3, GL_INT, 0, pointer(Offset));
-            GLSLType4I:  gl.VertexAttribIPointer(L, 4, GL_INT, 0, pointer(Offset));
-            GLSLType1UI: gl.VertexAttribIPointer(L, 1, GL_UNSIGNED_INT, 0, pointer(Offset));
-            GLSLType2UI: gl.VertexAttribIPointer(L, 2, GL_UNSIGNED_INT, 0, pointer(Offset));
-            GLSLType3UI: gl.VertexAttribIPointer(L, 3, GL_UNSIGNED_INT, 0, pointer(Offset));
-            GLSLType4UI: gl.VertexAttribIPointer(L, 4, GL_UNSIGNED_INT, 0, pointer(Offset));
-            GLSLTypeMat2F: gl.VertexAttribPointer(L, 4, GL_FLOAT, false, 0, pointer(Offset));
-            GLSLTypeMat3F: gl.VertexAttribPointer(L, 9, GL_FLOAT, false, 0, pointer(Offset));
-            GLSLTypeMat4F: gl.VertexAttribPointer(L, 16, GL_FLOAT, false, 0, pointer(Offset));
-        end; // of case
-      end;
-      Inc(Offset, GR.GetAttribArraySize(Attributes[I]));
-    end;
-
-    // Enable engagement attributes array
-    begin
-      for I := GLS_VERTEX_ATTR_NUM - 1 downto 0 do
-        if EnabledLocations[I] then
-          gl.EnableVertexAttribArray(I)
-        else
-          gl.DisableVertexAttribArray(I);
-    end;
-
-    FVAO.UnBind;
-    FVAO.NotifyDataUpdated;
-  end;
-end;
-
-constructor TGLCustomFeedBackMesh.Create(AOwner: TComponent);
-begin
-  inherited;
-  ObjectStyle := ObjectStyle + [osDirectDraw];
-  FAttributes := TGLVertexAttributes.Create(Self);
-  FVAO := TGLVertexArrayHandle.Create;
-  FVBO := TGLVBOArrayBufferHandle.Create;
-  FEBO := TGLVBOElementArrayHandle.Create;
-  FPrimitiveType := fbmpPoint;
-  FLaunching := fblCommon;
-  FVertexNumber := 1;
-  FElementNumber := 0;
-  FBlend := False;
-end;
-
-destructor TGLCustomFeedBackMesh.Destroy;
-begin
-  Shader := nil;
-  FAttributes.Destroy;
-  FVAO.Destroy;
-  FVBO.Destroy;
-  FEBO.Destroy;
-  inherited;
-end;
-
-procedure TGLCustomFeedBackMesh.LaunchKernels;
-var
-  i: Integer;
-  GR: TCUDAGeometryResource;
-//  IR: TCUDAGLImageResource;
-begin
-
-  if Assigned(FGeometryResource) then
-  begin
-    // Produce geometry resource
-    GR := TCUDAGeometryResource(FGeometryResource);
-    GR.MapResources;
-    // Produce vertex attributes
-    case Launching of
-      fblCommon:
-        begin
-          for I := 0 to FAttributes.Count - 1 do
-            with FAttributes.Attributes[I] do
-              if Assigned(OnBeforeKernelLaunch) then
-                OnBeforeKernelLaunch(FAttributes.Attributes[I]);
-          if Assigned(FCommonFunc) then
-            FCommonFunc.Launch;
-        end;
-      fblOnePerAtttribute:
-        begin
-          for I := 0 to FAttributes.Count - 1 do
-            with FAttributes.Attributes[I] do
-            begin
-              if Assigned(OnBeforeKernelLaunch) then
-                OnBeforeKernelLaunch(FAttributes.Attributes[I]);
-              if Assigned(KernelFunction) then
-                KernelFunction.Launch;
-            end;
-        end;
-    else
-      Assert(False, strErrorEx + strUnknownType);
-    end;
-    // Produce indexes
-    if (GR.GetElementArrayDataSize > 0)
-      and Assigned(FCommonFunc) then
-        FCommonFunc.Launch;
-
-    GR.UnMapResources;
-  end;
-end;
-//    // Produce image resource
-//  else if FGLResource is TCUDAGLImageResource then
-//  begin
-//    IR := TCUDAGLImageResource(FGLResource);
-//    IR.MapResources;
-//    if Assigned(FBeforeLaunch) then
-//      FBeforeLaunch(Self, 0);
-//    if Assigned(FManufacturer) then
-//      FManufacturer.Launch;
-//    IR.UnMapResources;
-//  end;
-
-procedure TGLCustomFeedBackMesh.DoRender(var ARci: TGLRenderContextInfo; ARenderSelf,
-  ARenderChildren: Boolean);
-const
-  cPrimitives: array[TFeedBackMeshPrimitive] of Cardinal =
-    (GL_POINTS, GL_LINES, GL_TRIANGLES);
-begin
-  if ARenderSelf
-    and not (csDesigning in ComponentState)
-    and Assigned(FShader)
-    and Assigned(FGeometryResource) then
-    try
-      FShader.Apply(ARci, Self);
-      if FVAO.IsDataNeedUpdate then
-        AllocateHandles;
-
-      // Produce mesh data
-      LaunchKernels;
-      // Draw mesh
-      FVAO.Bind;
-      // Multipass Shader Loop
-      repeat
-        // Render mesh
-        if FElementNumber > 0 then
-        begin
-          gl.DrawElements(
-            cPrimitives[FPrimitiveType],
-            FElementNumber,
-            GL_UNSIGNED_INT,
-            nil);
-        end
-        else
-        begin
-          gl.DrawArrays(
-            cPrimitives[FPrimitiveType],
-            0,
-            FVertexNumber);
-        end;
-      until not FShader.UnApply(ARci);
-      FVAO.UnBind;
-    except
-      Visible := False;
-    end;
-
-  if ARenderChildren then
-    Self.RenderChildren(0, Count - 1, ARci);
-end;
-
-procedure TGLCustomFeedBackMesh.Notification(AComponent: TComponent;
-  Operation: TOperation);
-var
-  I: Integer;
-begin
-  if Operation = opRemove then
-  begin
-    if AComponent = Shader then
-      Shader := nil
-    else if AComponent = FCommonFunc then
-      CommonKernelFunction := nil
-    else if AComponent is TCUDAFunction then
-    begin
-      for I := 0 to FAttributes.Count - 1  do
-        if FAttributes[I].KernelFunction = AComponent then
-          FAttributes[I].KernelFunction := nil;
-    end;
-  end;
-  inherited;
-end;
-
-procedure TGLCustomFeedBackMesh.RefreshAttributes;
-var
-  I: Integer;
-  AttribInfo: TGLActiveAttribArray;
-begin
-  if Assigned(FShader) and FShader.Enabled then
-  begin
-    FShader.FailedInitAction := fiaSilentDisable;
-    Scene.CurrentBuffer.RenderingContext.Activate;
-    try
-      AttribInfo := FShader.GetActiveAttribs;
-    except
-      FShader.Enabled := False;
-      Scene.CurrentBuffer.RenderingContext.Deactivate;
-      exit;
-    end;
-    Scene.CurrentBuffer.RenderingContext.Deactivate;
-    FAttributes.Clear;
-    for I := 0 to High(AttribInfo) do
-    begin
-      with FAttributes.Add do
-      begin
-        Name := AttribInfo[I].Name;
-        GLSLType := AttribInfo[I].AType;
-        FLocation := AttribInfo[I].Location;
-      end;
-    end;
-    FVAO.NotifyChangesOfData;
-  end;
-end;
-
-procedure TGLCustomFeedBackMesh.SetAttributes(AValue: TGLVertexAttributes);
-begin
-  FAttributes.Assign(AValue);
-end;
-
-procedure TGLCustomFeedBackMesh.SetCommonFunc(AFunc: TCUDAFunction);
-begin
-  if AFunc <> FCommonFunc then
-  begin
-    if Assigned(FCommonFunc) then
-      FCommonFunc.RemoveFreeNotification(Self);
-    FCommonFunc := AFunc;
-    if Assigned(FCommonFunc) then
-      FCommonFunc.FreeNotification(Self);
-  end;
-end;
-
-procedure TGLCustomFeedBackMesh.SetElementNumber(AValue: Integer);
-begin
-  if AValue < 0 then
-    AValue := 0;
-  FElementNumber := AValue;
-  FVAO.NotifyChangesOfData;
-end;
-
-procedure TGLCustomFeedBackMesh.SetPrimitiveType(AValue: TFeedBackMeshPrimitive);
-begin
-  FPrimitiveType := AValue;
-end;
-
-procedure TGLCustomFeedBackMesh.SetShader(AShader: TGLSLShader);
-begin
-  if AShader <> FShader then
-  begin
-    if Assigned(FShader) then
-      FShader.RemoveFreeNotification(Self);
-    FShader := AShader;
-    if Assigned(FShader) then
-      FShader.FreeNotification(Self);
-    if not (csLoading in ComponentState) then
-      RefreshAttributes;
-  end;
-end;
-
-procedure TGLCustomFeedBackMesh.SetVertexNumber(AValue: Integer);
-begin
-  if AValue < 1 then
-    AValue := 1;
-  FVertexNumber := AValue;
-  FVAO.NotifyChangesOfData;
-end;
-
- 
-
-initialization
-
-  RegisterClasses([TCUDAImageResource, TCUDAGeometryResource,
-    TGLCustomFeedBackMesh, TGLFeedbackMesh]);
-
-end.
-
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDAGraphics;
+
+(* CUDA Graphics for GLScene *)
+
+interface
+
+{$I GLScene.inc}
+
+uses
+  Winapi.OpenGL,
+  Winapi.OpenGLext,
+  System.Classes,
+  System.SysUtils,
+
+  GLS.OpenGLTokens,
+  Import.CUDAApi,
+  GPU.CUDA,
+
+  GLS.Context,
+  GLS.State,
+  GLS.Scene,
+  GLS.Graphics,
+  GLS.Material,
+  GLS.Strings,
+  GLS.TextureFormat,
+  GLS.Texture,
+  GLSL.Shader,
+  GLSL.ShaderParameter,
+  GLS.PersistentClasses,
+  GLS.RenderContextInfo;
+
+
+type
+  TGLVertexAttribute = class;
+  TGLVertexAttributes = class;
+
+  TOnBeforeKernelLaunch = procedure(Sender: TGLVertexAttribute) of object;
+
+  TGLVertexAttribute = class(TCollectionItem)
+  private
+    FName: string;
+    FType: TGLSLDataType;
+    FFunc: TCUDAFunction;
+    FLocation: Integer;
+    FOnBeforeKernelLaunch: TOnBeforeKernelLaunch;
+    procedure SetName(const AName: string);
+    procedure SetType(AType: TGLSLDataType);
+    procedure SetFunc(AFunc: TCUDAFunction);
+    function GetLocation: Integer;
+    function GetOwner: TGLVertexAttributes; reintroduce;
+  public
+    constructor Create(ACollection: TCollection); override;
+    procedure NotifyChange(Sender: TObject);
+    property Location: Integer read GetLocation;
+  published
+    property Name: string read FName write SetName;
+    property GLSLType: TGLSLDataType read FType write SetType;
+    property KernelFunction: TCUDAFunction read FFunc write SetFunc;
+    property OnBeforeKernelLaunch: TOnBeforeKernelLaunch read
+      FOnBeforeKernelLaunch write FOnBeforeKernelLaunch;
+  end;
+
+  TGLVertexAttributes = class(TOwnedCollection)
+  private
+    procedure SetItems(Index: Integer; const AValue: TGLVertexAttribute);
+    function GetItems(Index: Integer): TGLVertexAttribute;
+  public
+    constructor Create(AOwner: TComponent);
+    procedure NotifyChange(Sender: TObject);
+    function MakeUniqueName(const ANameRoot: string): string;
+    function GetAttributeByName(const AName: string): TGLVertexAttribute;
+    function Add: TGLVertexAttribute;
+    property Attributes[Index: Integer]: TGLVertexAttribute read GetItems
+      write SetItems; default;
+  end;
+
+  TFeedBackMeshPrimitive = (fbmpPoint, fbmpLine, fbmpTriangle);
+  TFeedBackMeshLaunching = (fblCommon, fblOnePerAtttribute);
+
+  TCUDACustomFeedBackMesh = class(TGLBaseSceneObject)
+  private
+    FGeometryResource: TCUDAGraphicResource;
+    FAttributes: TGLVertexAttributes;
+    FVAO: TGLVertexArrayHandle;
+    FVBO: TGLVBOArrayBufferHandle;
+    FEBO: TGLVBOElementArrayHandle;
+    FPrimitiveType: TFeedBackMeshPrimitive;
+    FVertexNumber: Integer;
+    FElementNumber: Integer;
+    FShader: TGLSLShader;
+    FCommonFunc: TCUDAFunction;
+    FLaunching: TFeedBackMeshLaunching;
+    FBlend: Boolean;
+    procedure SetAttributes(AValue: TGLVertexAttributes);
+    procedure SetPrimitiveType(AValue: TFeedBackMeshPrimitive);
+    procedure SetVertexNumber(AValue: Integer);
+    procedure SetElementNumber(AValue: Integer);
+    procedure SetShader(AShader: TGLSLShader);
+    procedure SetCommonFunc(AFunc: TCUDAFunction);
+  protected
+    procedure Notification(AComponent: TComponent;
+      Operation: TOperation); override;
+    procedure RefreshAttributes;
+    procedure AllocateHandles;
+    procedure LaunchKernels;
+  protected
+    property Attributes: TGLVertexAttributes read FAttributes write SetAttributes;
+    // GLSL shader as material. If it absent or disabled - nothing be drawen.
+    property Shader: TGLSLShader read FShader write SetShader;
+    // Primitive type.
+    property PrimitiveType: TFeedBackMeshPrimitive read FPrimitiveType
+      write SetPrimitiveType default fbmpPoint;
+    // Number of vertexes in array buffer.
+    property VertexNumber: Integer read FVertexNumber
+      write SetVertexNumber default 1;
+    // Number of indexes in element buffer. Zero to disable.
+    property ElementNumber: Integer read FElementNumber
+      write SetElementNumber default 0;
+    (* Used for all attributes and elements if Launching = fblCommon
+       otherwise used own attribute function and this for elements. *)
+    property CommonKernelFunction: TCUDAFunction read FCommonFunc
+      write SetCommonFunc;
+    (* Define mode of manufacturer launching:
+       fblCommon - single launch for all,
+       flOnePerAtttribute - one launch per attribute and elements *)
+    property Launching: TFeedBackMeshLaunching read FLaunching
+      write FLaunching default fblCommon;
+    //Defines if the object uses blending for object sorting purposes.
+    property Blend: Boolean read FBlend write FBlend default False;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure DoRender(var ARci: TGLRenderContextInfo;
+      ARenderSelf, ARenderChildren: Boolean); override;
+    property ArrayBufferHandle: TGLVBOArrayBufferHandle read FVBO;
+    property ElementArrayHandle: TGLVBOElementArrayHandle read FEBO;
+  end;
+
+  TCUDAFeedbackMesh = class(TCUDACustomFeedBackMesh)
+  published
+    property Attributes;
+    property Shader;
+    property PrimitiveType;
+    property VertexNumber;
+    property ElementNumber;
+    property CommonKernelFunction;
+    property Launching;
+    property Blend;
+    property ObjectsSorting;
+    property VisibilityCulling;
+    property Direction;
+    property PitchAngle;
+    property Position;
+    property RollAngle;
+    property Scale;
+    property ShowAxes;
+    property TurnAngle;
+    property Up;
+    property Visible;
+    property Pickable;
+    property OnProgress;
+    property OnPicked;
+    property Behaviours;
+    property Effects;
+  end;
+
+  TCUDAImageResource = class(TCUDAGraphicResource)
+  private
+    fMaterialLibrary: TGLMaterialLibrary;
+    fTextureName: TGLLibMaterialName;
+    procedure SetMaterialLibrary(const Value: TGLMaterialLibrary);
+    procedure SetTextureName(const Value: TGLLibMaterialName);
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    procedure Notification(AComponent: TComponent; Operation: TOperation);
+      override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure MapResources; override;
+    procedure UnMapResources; override;
+    procedure BindArrayToTexture(var cudaArray: TCUDAMemData;
+      ALeyer, ALevel: LOngWord); override;
+  published
+    property TextureName: TGLLibMaterialName read fTextureName write
+      SetTextureName;
+    property MaterialLibrary: TGLMaterialLibrary read fMaterialLibrary write
+      SetMaterialLibrary;
+    property Mapping;
+  end;
+
+  TCUDAGeometryResource = class(TCUDAGraphicResource)
+  private
+    FFeedBackMesh: TCUDACustomFeedBackMesh;
+    procedure SetFeedBackMesh(const Value: TCUDACustomFeedBackMesh);
+    function GetAttribArraySize(AAttr: TGLVertexAttribute): LongWord;
+  protected
+    procedure AllocateHandles; override;
+    procedure DestroyHandles; override;
+    procedure Notification(AComponent: TComponent; Operation: TOperation); override;
+    function GetAttributeArraySize(const AName: string): LongWord; override;
+    function GetAttributeArrayAddress(const AName: string): Pointer; override;
+    function GetElementArrayDataSize: LongWord; override;
+    function GetElementArrayAddress: Pointer; override;
+  public
+    constructor Create(AOwner: TComponent); override;
+    destructor Destroy; override;
+    procedure MapResources; override;
+    procedure UnMapResources; override;
+    property AttributeDataSize[const AttribName: string]: LongWord read
+      GetAttributeArraySize;
+    property AttributeDataAddress[const AttribName: string]: Pointer read
+      GetAttributeArrayAddress;
+    property IndexDataSize: LongWord read GetElementArrayDataSize;
+    property IndexDataAddress: Pointer read GetElementArrayAddress;
+  published
+    property FeedBackMesh: TCUDACustomFeedBackMesh read FFeedBackMesh write
+      SetFeedBackMesh;
+    property Mapping;
+  end;
+
+//---------------------------------------------------------------------------
+implementation
+//---------------------------------------------------------------------------
+
+// ------------------
+// ------------------ TCUDAImageResource ------------------
+// ------------------
+
+constructor TCUDAImageResource.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  fHandle[0] := nil;
+  fResourceType := rtTexture;
+  FGLContextHandle := TGLVirtualHandle.Create;
+  FGLContextHandle.OnAllocate := OnGLHandleAllocate;
+  FGLContextHandle.OnDestroy := OnGLHandleDestroy;
+end;
+
+destructor TCUDAImageResource.Destroy;
+begin
+  FGLContextHandle.Destroy;
+  inherited;
+end;
+
+procedure TCUDAImageResource.SetMaterialLibrary(const Value:
+  TGLMaterialLibrary);
+begin
+  if fMaterialLibrary <> Value then
+  begin
+    if Assigned(fMaterialLibrary) then
+      fMaterialLibrary.RemoveFreeNotification(Self);
+    fMaterialLibrary := Value;
+    if Assigned(fMaterialLibrary) then
+    begin
+      fMaterialLibrary.FreeNotification(Self);
+      if fMaterialLibrary.TextureByName(fTextureName) <> nil then
+        DestroyHandles;
+    end;
+  end;
+end;
+
+procedure TCUDAImageResource.SetTextureName(const Value: TGLLibMaterialName);
+begin
+  if fTextureName <> Value then
+  begin
+    fTextureName := Value;
+    DestroyHandles;
+  end;
+end;
+
+procedure TCUDAImageResource.UnMapResources;
+begin
+  if FMapCounter > 0 then
+    Dec(FMapCounter);
+
+  if FMapCounter = 0 then
+  begin
+    if Assigned(FHandle[0]) then
+    begin
+      Context.Requires;
+      FStatus := cuGraphicsUnMapResources(1, @FHandle[0], nil);
+      Context.Release;
+      if FStatus <> CUDA_SUCCESS then
+        Abort;
+    end;
+  end;
+end;
+
+procedure TCUDAImageResource.AllocateHandles;
+const
+  cMapping: array[TCUDAMapping] of TCUgraphicsMapResourceFlags = (
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
+var
+  LTexture: TGLTexture;
+  glHandle: Cardinal;
+begin
+  FGLContextHandle.AllocateHandle;
+
+  if FGLContextHandle.IsDataNeedUpdate
+    and Assigned(FMaterialLibrary)
+    and (Length(FTextureName) > 0) then
+  begin
+    inherited;
+
+    LTexture := FMaterialLibrary.TextureByName(FTextureName);
+    if Assigned(LTexture) then
+    begin
+      glHandle := LTexture.AllocateHandle;
+      if glHandle = 0 then
+        Abort;
+
+      Context.Requires;
+      DestroyHandles;
+
+      FStatus := cuGraphicsGLRegisterImage(
+        FHandle[0],
+        glHandle,
+        DecodeTextureTarget(LTexture.Image.NativeTextureTarget),
+        cMapping[fMapping]);
+
+      Context.Release;
+
+      if FStatus <> CUDA_SUCCESS then
+        Abort;
+
+      FGLContextHandle.NotifyDataUpdated;
+    end;
+  end;
+end;
+
+procedure TCUDAImageResource.DestroyHandles;
+begin
+  if Assigned(FHandle[0]) then
+  begin
+    inherited;
+    Context.Requires;
+    FStatus := cuGraphicsUnregisterResource(FHandle[0]);
+    Context.Release;
+    FHandle[0] := nil;
+    FGLContextHandle.NotifyChangesOfData;
+  end;
+end;
+
+procedure TCUDAImageResource.MapResources;
+begin
+  AllocateHandles;
+
+  if FMapCounter = 0 then
+  begin
+    if Assigned(FHandle[0]) then
+    begin
+      Context.Requires;
+      FStatus := cuGraphicsMapResources(1, @FHandle[0], nil);
+      Context.Release;
+      if FStatus <> CUDA_SUCCESS then
+        Abort;
+    end;
+  end;
+  Inc(FMapCounter);
+end;
+
+procedure TCUDAImageResource.Notification(AComponent: TComponent; Operation:
+  TOperation);
+begin
+  inherited;
+  if (AComponent = fMaterialLibrary) and (Operation = opRemove) then
+  begin
+    fMaterialLibrary := nil;
+    fTextureName := '';
+    DestroyHandles;
+  end;
+end;
+
+procedure TCUDAImageResource.BindArrayToTexture(var cudaArray: TCUDAMemData;
+  ALeyer, ALevel: LOngWord);
+var
+  LTexture: TGLTexture;
+  newArray: PCUarray;
+begin
+  if FMapCounter = 0 then
+  begin
+   {$IFDEF USE_LOGGING}
+    LogError(strFailToBindArrayToTex);
+   {$ENDIF}
+    Abort;
+  end;
+
+  Context.Requires;
+  FStatus := cuGraphicsSubResourceGetMappedArray(
+    newArray, FHandle[0], ALeyer, ALevel);
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  LTexture := FMaterialLibrary.TextureByName(FTextureName);
+  SetArray(cudaArray, newArray, True, LTexture.TexDepth > 0);
+end;
+
+
+
+// ------------------
+// ------------------ TCUDAGeometryResource ------------------
+// ------------------
+
+constructor TCUDAGeometryResource.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner);
+  FHandle[0] := nil;
+  FHandle[1] := nil;
+  FResourceType := rtBuffer;
+  FMapCounter := 0;
+  FGLContextHandle := TGLVirtualHandle.Create;
+  FGLContextHandle.OnAllocate := OnGLHandleAllocate;
+  FGLContextHandle.OnDestroy := OnGLHandleDestroy;
+end;
+
+destructor TCUDAGeometryResource.Destroy;
+begin
+  FeedBackMesh := nil;
+  FGLContextHandle.Destroy;
+  inherited;
+end;
+
+procedure TCUDAGeometryResource.SetFeedBackMesh(const Value:
+  TCUDACustomFeedBackMesh);
+begin
+  if FFeedBackMesh <> Value then
+  begin
+    if Assigned(FFeedBackMesh) then
+    begin
+      FFeedBackMesh.RemoveFreeNotification(Self);
+      FFeedBackMesh.FGeometryResource := nil;
+    end;
+    FFeedBackMesh := Value;
+    if Assigned(FFeedBackMesh) then
+    begin
+      FFeedBackMesh.FreeNotification(Self);
+      FFeedBackMesh.FGeometryResource := Self;
+    end;
+    DestroyHandles;
+  end;
+end;
+
+procedure TCUDAGeometryResource.AllocateHandles;
+const
+  cMapping: array[TCUDAMapping] of TCUgraphicsMapResourceFlags = (
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
+
+begin
+  inherited;
+  FGLContextHandle.AllocateHandle;
+  if FGLContextHandle.IsDataNeedUpdate then
+  begin
+    if FFeedBackMesh.FVBO.IsDataNeedUpdate then
+      FFeedBackMesh.AllocateHandles;
+
+    Context.Requires;
+
+    DestroyHandles;
+
+    // Register vertex array
+    FStatus := cuGraphicsGLRegisterBuffer(
+      FHandle[0],
+      FFeedBackMesh.FVBO.Handle,
+      cMapping[FMapping]);
+
+    // Register element array
+    if FFeedBackMesh.ElementNumber > 0 then
+      CollectStatus(
+        cuGraphicsGLRegisterBuffer(
+          FHandle[1],
+          FFeedBackMesh.FEBO.Handle,
+          cMapping[FMapping]));
+
+    Context.Release;
+
+    if FStatus <> CUDA_SUCCESS then
+      Abort;
+
+    FGLContextHandle.NotifyDataUpdated;
+  end;
+end;
+
+procedure TCUDAGeometryResource.DestroyHandles;
+begin
+  if Assigned(fHandle[0]) or Assigned(fHandle[1]) then
+  begin
+    inherited;
+
+    Context.Requires;
+
+    while FMapCounter > 0 do
+      UnMapResources;
+
+    FStatus := CUDA_SUCCESS;
+
+    if Assigned(fHandle[0]) then
+    begin
+      CollectStatus(cuGraphicsUnregisterResource(fHandle[0]));
+      fHandle[0] := nil;
+    end;
+
+    if Assigned(fHandle[1]) then
+    begin
+      CollectStatus(cuGraphicsUnregisterResource(fHandle[1]));
+      fHandle[1] := nil;
+    end;
+
+    Context.Release;
+    FGLContextHandle.NotifyChangesOfData;
+  end;
+end;
+
+procedure TCUDAGeometryResource.Notification(AComponent: TComponent;
+  Operation:
+  TOperation);
+begin
+  inherited;
+  if (AComponent = FFeedBackMesh) and (Operation = opRemove) then
+  begin
+    FeedBackMesh := nil;
+    DestroyHandles;
+  end;
+end;
+
+procedure TCUDAGeometryResource.MapResources;
+var
+  count: Integer;
+begin
+  AllocateHandles;
+
+  if FMapCounter = 0 then
+  begin
+    if Assigned(FHandle[0]) then
+    begin
+      count := 1;
+      if Assigned(FHandle[1]) then
+        Inc(count);
+      Context.Requires;
+      FStatus := cuGraphicsMapResources(count, @FHandle[0], nil);
+      Context.Release;
+      if FStatus <> CUDA_SUCCESS then
+        Abort;
+    end;
+  end;
+  Inc(FMapCounter);
+end;
+
+procedure TCUDAGeometryResource.UnMapResources;
+var
+  count: Integer;
+begin
+  if FMapCounter > 0 then
+    Dec(FMapCounter);
+
+  if FMapCounter = 0 then
+  begin
+    if Assigned(FHandle[0]) then
+    begin
+      count := 1;
+      if Assigned(FHandle[1]) then
+        Inc(count);
+      Context.Requires;
+      FStatus := cuGraphicsUnMapResources(count, @FHandle[0], nil);
+      Context.Release;
+      if FStatus <> CUDA_SUCCESS then
+        Abort;
+    end;
+  end;
+end;
+
+function TCUDAGeometryResource.GetAttribArraySize(AAttr: TGLVertexAttribute): LongWord;
+var
+  typeSize: LongWord;
+begin
+  case AAttr.GLSLType of
+    GLSLType1F: typeSize := SizeOf(Single);
+    GLSLType2F: typeSize := 2 * SizeOf(Single);
+    GLSLType3F: typeSize := 3 * SizeOf(Single);
+    GLSLType4F: typeSize := 4 * SizeOf(Single);
+    GLSLType1I: typeSize := SizeOf(Integer);
+    GLSLType2I: typeSize := 2 * SizeOf(Integer);
+    GLSLType3I: typeSize := 3 * SizeOf(Integer);
+    GLSLType4I: typeSize := 4 * SizeOf(Integer);
+    GLSLType1UI: typeSize := SizeOf(Integer);
+    GLSLType2UI: typeSize := 2 * SizeOf(Integer);
+    GLSLType3UI: typeSize := 3 * SizeOf(Integer);
+    GLSLType4UI: typeSize := 4 * SizeOf(Integer);
+    GLSLTypeMat2F: typeSize := 4 * SizeOf(Single);
+    GLSLTypeMat3F: typeSize := 9 * SizeOf(Single);
+    GLSLTypeMat4F: typeSize := 16 * SizeOf(Single);
+  else
+    begin
+      Assert(False, strErrorEx + strUnknownType);
+      typeSize := 0;
+    end;
+  end;
+  Result := Cardinal(FFeedBackMesh.VertexNumber) * typeSize;
+end;
+
+function TCUDAGeometryResource.GetAttributeArraySize(
+  const AName: string): LongWord;
+var
+  LAttr: TGLVertexAttribute;
+begin
+  Result := 0;
+  LAttr := FFeedBackMesh.Attributes.GetAttributeByName(AName);
+  if not Assigned(LAttr) then
+    exit;
+  if LAttr.GLSLType = GLSLTypeUndefined then
+    exit;
+  Result := GetAttribArraySize(LAttr);
+end;
+
+function TCUDAGeometryResource.GetAttributeArrayAddress(
+  const AName: string): Pointer;
+var
+  i: Integer;
+  Size: Cardinal;
+  MapPtr: Pointer;
+  LAttr: TGLVertexAttribute;
+begin
+  Result := nil;
+  if FMapCounter = 0 then
+    exit;
+  LAttr := FFeedBackMesh.Attributes.GetAttributeByName(AName);
+  if not Assigned(LAttr) then
+    exit;
+
+  for i := 0 to LAttr.Index - 1 do
+    Inc(PByte(Result), GetAttribArraySize(FFeedBackMesh.Attributes[i]));
+
+  Context.Requires;
+  MapPtr := nil;
+  FStatus := cuGraphicsResourceGetMappedPointer(
+    MapPtr, Size, FHandle[0]);
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  if Cardinal(Result) + GetAttribArraySize(LAttr) > Size then
+  begin
+    {$IFDEF USE_LOGGING}
+    LogError(strOutOfAttribSize);
+   {$ENDIF}
+    Abort;
+  end;
+
+  Inc(Pbyte(Result), Cardinal(MapPtr));
+end;
+
+function TCUDAGeometryResource.GetElementArrayDataSize: LongWord;
+begin
+  Result := FFeedBackMesh.ElementNumber * SizeOf(Cardinal);
+end;
+
+function TCUDAGeometryResource.GetElementArrayAddress: Pointer;
+var
+  Size: Cardinal;
+  MapPtr: Pointer;
+begin
+  Result := nil;
+  if (FHandle[1] = nil) and (FMapCounter = 0) then
+    exit;
+
+  Context.Requires;
+  MapPtr := nil;
+  FStatus := cuGraphicsResourceGetMappedPointer(MapPtr, Size, FHandle[1]);
+  Context.Release;
+
+  if FStatus <> CUDA_SUCCESS then
+    Abort;
+
+  if GetElementArrayDataSize > Size then
+  begin
+    {$IFDEF USE_LOGGING}
+    LogError(strOutOfElementSize);
+    {$ENDIF}
+    Abort;
+  end;
+
+  Inc(Pbyte(Result), Cardinal(MapPtr));
+end;
+
+
+
+// -----------------------
+// ----------------------- TGLVertexAttribute -------------------
+// -----------------------
+
+
+constructor TGLVertexAttribute.Create(ACollection: TCollection);
+begin
+  inherited;
+  FName := GetOwner.MakeUniqueName('Attrib');
+  FType := GLSLTypeUndefined;
+  FLocation := -1;
+end;
+
+procedure TGLVertexAttribute.SetFunc(AFunc: TCUDAFunction);
+var
+  LMesh: TCUDACustomFeedBackMesh;
+begin
+  LMesh := TCUDACustomFeedBackMesh(GetOwner.GetOwner);
+  if Assigned(FFunc) then
+    FFunc.RemoveFreeNotification(LMesh);
+  FFunc := AFunc;
+  if Assigned(FFunc) then
+    FFunc.FreeNotification(LMesh);
+end;
+
+procedure TGLVertexAttribute.SetName(const AName: string);
+begin
+  if AName <> FName then
+  begin
+    FName := '';
+    FName := GetOwner.MakeUniqueName(AName);
+    NotifyChange(Self);
+  end;
+end;
+
+procedure TGLVertexAttribute.SetType(AType: TGLSLDataType);
+begin
+  if AType <> FType then
+  begin
+    FType := AType;
+    NotifyChange(Self);
+  end;
+end;
+
+function TGLVertexAttribute.GetLocation: Integer;
+begin
+  if FLocation < 0 then
+    FLocation := gl.GetAttribLocation(
+      CurrentGLContext.GLStates.CurrentProgram,
+      PAnsiChar(AnsiString(FName)));
+  Result := FLocation;
+end;
+
+function TGLVertexAttribute.GetOwner: TGLVertexAttributes;
+begin
+  Result := TGLVertexAttributes(Collection);
+end;
+
+procedure TGLVertexAttribute.NotifyChange(Sender: TObject);
+begin
+  GetOwner.NotifyChange(Self);
+end;
+ 
+
+// -----------------------
+// ----------------------- TGLVertexAttributes -------------------
+// -----------------------
+
+function TGLVertexAttributes.Add: TGLVertexAttribute;
+begin
+  Result := (inherited Add) as TGLVertexAttribute;
+end;
+
+constructor TGLVertexAttributes.Create(AOwner: TComponent);
+begin
+  inherited Create(AOwner, TGLVertexAttribute);
+end;
+
+function TGLVertexAttributes.GetAttributeByName(
+  const AName: string): TGLVertexAttribute;
+var
+  I: Integer;
+  A: TGLVertexAttribute;
+begin
+  // Brute-force, there no need optimization
+  for I := 0 to Count - 1 do
+  begin
+    A := TGLVertexAttribute(Items[i]);
+    if A.Name = AName then
+      Exit(A);
+  end;
+  Result := nil;
+end;
+
+function TGLVertexAttributes.GetItems(Index: Integer): TGLVertexAttribute;
+begin
+  Result := TGLVertexAttribute(inherited Items[index]);
+end;
+
+function TGLVertexAttributes.MakeUniqueName(const ANameRoot: string): string;
+var
+  I: Integer;
+begin
+  Result := ANameRoot;
+  I := 1;
+  while GetAttributeByName(Result) <> nil do
+  begin
+    Result := ANameRoot + IntToStr(I);
+    Inc(I);
+  end;
+end;
+
+procedure TGLVertexAttributes.NotifyChange(Sender: TObject);
+begin
+  TCUDACustomFeedBackMesh(GetOwner).NotifyChange(Self);
+end;
+
+procedure TGLVertexAttributes.SetItems(Index: Integer;
+  const AValue: TGLVertexAttribute);
+begin
+  inherited Items[index] := AValue;
+end;
+
+
+
+// -----------------------
+// ----------------------- TCUDACustomFeedBackMesh -------------------
+// -----------------------
+
+procedure TCUDACustomFeedBackMesh.AllocateHandles;
+var
+  I, L: Integer;
+  Size, Offset: Cardinal;
+  GR: TCUDAGeometryResource;
+  EnabledLocations: array[0..GLS_VERTEX_ATTR_NUM - 1] of Boolean;
+begin
+  FVAO.AllocateHandle;
+  FVBO.AllocateHandle;
+  FEBO.AllocateHandle;
+
+  if Assigned(FGeometryResource) then
+  begin
+    GR := TCUDAGeometryResource(FGeometryResource);
+    size := 0;
+    for I := 0 to Attributes.Count - 1 do
+      Inc(size, GR.GetAttribArraySize(Attributes[I]));
+
+    FVAO.Bind;
+    FVBO.BindBufferData(nil, size, GL_STREAM_DRAW);
+    if FElementNumber > 0 then
+      FEBO.BindBufferData(nil, GR.GetElementArrayDataSize, GL_STREAM_DRAW)
+    else
+      FEBO.UnBind; // Just in case
+
+    // Predisable attributes
+    for I := 0 to GLS_VERTEX_ATTR_NUM - 1 do
+      EnabledLocations[I] := false;
+
+    Offset := 0;
+    for I := 0 to Attributes.Count - 1 do
+    begin
+      L := Attributes[I].Location;
+      if L > -1 then
+      begin
+        EnabledLocations[I] := True;
+        case Attributes[I].GLSLType of
+            GLSLType1F:  gl.VertexAttribPointer(L, 1, GL_FLOAT, false, 0, pointer(Offset));
+            GLSLType2F:  gl.VertexAttribPointer(L, 2, GL_FLOAT, false, 0, pointer(Offset));
+            GLSLType3F:  gl.VertexAttribPointer(L, 3, GL_FLOAT, false, 0, pointer(Offset));
+            GLSLType4F:  gl.VertexAttribPointer(L, 4, GL_FLOAT, false, 0, pointer(Offset));
+            GLSLType1I:  gl.VertexAttribIPointer(L, 1, GL_INT, 0, pointer(Offset));
+            GLSLType2I:  gl.VertexAttribIPointer(L, 2, GL_INT, 0, pointer(Offset));
+            GLSLType3I:  gl.VertexAttribIPointer(L, 3, GL_INT, 0, pointer(Offset));
+            GLSLType4I:  gl.VertexAttribIPointer(L, 4, GL_INT, 0, pointer(Offset));
+            GLSLType1UI: gl.VertexAttribIPointer(L, 1, GL_UNSIGNED_INT, 0, pointer(Offset));
+            GLSLType2UI: gl.VertexAttribIPointer(L, 2, GL_UNSIGNED_INT, 0, pointer(Offset));
+            GLSLType3UI: gl.VertexAttribIPointer(L, 3, GL_UNSIGNED_INT, 0, pointer(Offset));
+            GLSLType4UI: gl.VertexAttribIPointer(L, 4, GL_UNSIGNED_INT, 0, pointer(Offset));
+            GLSLTypeMat2F: gl.VertexAttribPointer(L, 4, GL_FLOAT, false, 0, pointer(Offset));
+            GLSLTypeMat3F: gl.VertexAttribPointer(L, 9, GL_FLOAT, false, 0, pointer(Offset));
+            GLSLTypeMat4F: gl.VertexAttribPointer(L, 16, GL_FLOAT, false, 0, pointer(Offset));
+        end; // of case
+      end;
+      Inc(Offset, GR.GetAttribArraySize(Attributes[I]));
+    end;
+
+    // Enable engagement attributes array
+    begin
+      for I := GLS_VERTEX_ATTR_NUM - 1 downto 0 do
+        if EnabledLocations[I] then
+          gl.EnableVertexAttribArray(I)
+        else
+          gl.DisableVertexAttribArray(I);
+    end;
+
+    FVAO.UnBind;
+    FVAO.NotifyDataUpdated;
+  end;
+end;
+
+constructor TCUDACustomFeedBackMesh.Create(AOwner: TComponent);
+begin
+  inherited;
+  ObjectStyle := ObjectStyle + [osDirectDraw];
+  FAttributes := TGLVertexAttributes.Create(Self);
+  FVAO := TGLVertexArrayHandle.Create;
+  FVBO := TGLVBOArrayBufferHandle.Create;
+  FEBO := TGLVBOElementArrayHandle.Create;
+  FPrimitiveType := fbmpPoint;
+  FLaunching := fblCommon;
+  FVertexNumber := 1;
+  FElementNumber := 0;
+  FBlend := False;
+end;
+
+destructor TCUDACustomFeedBackMesh.Destroy;
+begin
+  Shader := nil;
+  FAttributes.Destroy;
+  FVAO.Destroy;
+  FVBO.Destroy;
+  FEBO.Destroy;
+  inherited;
+end;
+
+procedure TCUDACustomFeedBackMesh.LaunchKernels;
+var
+  i: Integer;
+  GeomRes: TCUDAGeometryResource;
+//  IR: TCUDAGLImageResource;
+begin
+
+  if Assigned(FGeometryResource) then
+  begin
+    // Produce geometry resource
+    GeomRes := TCUDAGeometryResource(FGeometryResource);
+    GeomRes.MapResources;
+    // Produce vertex attributes
+    case Launching of
+      fblCommon:
+        begin
+          for I := 0 to FAttributes.Count - 1 do
+            with FAttributes.Attributes[I] do
+              if Assigned(OnBeforeKernelLaunch) then
+                OnBeforeKernelLaunch(FAttributes.Attributes[I]);
+          if Assigned(FCommonFunc) then
+            FCommonFunc.Launch;
+        end;
+      fblOnePerAtttribute:
+        begin
+          for I := 0 to FAttributes.Count - 1 do
+            with FAttributes.Attributes[I] do
+            begin
+              if Assigned(OnBeforeKernelLaunch) then
+                OnBeforeKernelLaunch(FAttributes.Attributes[I]);
+              if Assigned(KernelFunction) then
+                KernelFunction.Launch;
+            end;
+        end;
+    else
+      Assert(False, strErrorEx + strUnknownType);
+    end;
+    // Produce indexes
+    if (GeomRes.GetElementArrayDataSize > 0)
+      and Assigned(FCommonFunc) then
+        FCommonFunc.Launch;
+
+    GeomRes.UnMapResources;
+  end;
+end;
+//    // Produce image resource
+//  else if FGLResource is TCUDAImageResource then
+//  begin
+//    IR := TCUDAImageResource(FGLResource);
+//    IR.MapResources;
+//    if Assigned(FBeforeLaunch) then
+//      FBeforeLaunch(Self, 0);
+//    if Assigned(FManufacturer) then
+//      FManufacturer.Launch;
+//    IR.UnMapResources;
+//  end;
+
+procedure TCUDACustomFeedBackMesh.DoRender(var ARci: TGLRenderContextInfo; ARenderSelf,
+  ARenderChildren: Boolean);
+const
+  cPrimitives: array[TFeedBackMeshPrimitive] of Cardinal =
+    (GL_POINTS, GL_LINES, GL_TRIANGLES);
+begin
+  if ARenderSelf
+    and not (csDesigning in ComponentState)
+    and Assigned(FShader)
+    and Assigned(FGeometryResource) then
+    try
+      FShader.Apply(ARci, Self);
+      if FVAO.IsDataNeedUpdate then
+        AllocateHandles;
+
+      // Produce mesh data
+      LaunchKernels;
+      // Draw mesh
+      FVAO.Bind;
+      // Multipass Shader Loop
+      repeat
+        // Render mesh
+        if FElementNumber > 0 then
+        begin
+          gl.DrawElements(
+            cPrimitives[FPrimitiveType],
+            FElementNumber,
+            GL_UNSIGNED_INT,
+            nil);
+        end
+        else
+        begin
+          gl.DrawArrays(
+            cPrimitives[FPrimitiveType],
+            0,
+            FVertexNumber);
+        end;
+      until not FShader.UnApply(ARci);
+      FVAO.UnBind;
+    except
+      Visible := False;
+    end;
+
+  if ARenderChildren then
+    Self.RenderChildren(0, Count - 1, ARci);
+end;
+
+procedure TCUDACustomFeedBackMesh.Notification(AComponent: TComponent;
+  Operation: TOperation);
+var
+  I: Integer;
+begin
+  if Operation = opRemove then
+  begin
+    if AComponent = Shader then
+      Shader := nil
+    else if AComponent = FCommonFunc then
+      CommonKernelFunction := nil
+    else if AComponent is TCUDAFunction then
+    begin
+      for I := 0 to FAttributes.Count - 1  do
+        if FAttributes[I].KernelFunction = AComponent then
+          FAttributes[I].KernelFunction := nil;
+    end;
+  end;
+  inherited;
+end;
+
+procedure TCUDACustomFeedBackMesh.RefreshAttributes;
+var
+  I: Integer;
+  AttribInfo: TGLActiveAttribArray;
+begin
+  if Assigned(FShader) and FShader.Enabled then
+  begin
+    FShader.FailedInitAction := fiaSilentDisable;
+    Scene.CurrentBuffer.RenderingContext.Activate;
+    try
+      AttribInfo := FShader.GetActiveAttribs;
+    except
+      FShader.Enabled := False;
+      Scene.CurrentBuffer.RenderingContext.Deactivate;
+      exit;
+    end;
+    Scene.CurrentBuffer.RenderingContext.Deactivate;
+    FAttributes.Clear;
+    for I := 0 to High(AttribInfo) do
+    begin
+      with FAttributes.Add do
+      begin
+        Name := AttribInfo[I].Name;
+        GLSLType := AttribInfo[I].AType;
+        FLocation := AttribInfo[I].Location;
+      end;
+    end;
+    FVAO.NotifyChangesOfData;
+  end;
+end;
+
+procedure TCUDACustomFeedBackMesh.SetAttributes(AValue: TGLVertexAttributes);
+begin
+  FAttributes.Assign(AValue);
+end;
+
+procedure TCUDACustomFeedBackMesh.SetCommonFunc(AFunc: TCUDAFunction);
+begin
+  if AFunc <> FCommonFunc then
+  begin
+    if Assigned(FCommonFunc) then
+      FCommonFunc.RemoveFreeNotification(Self);
+    FCommonFunc := AFunc;
+    if Assigned(FCommonFunc) then
+      FCommonFunc.FreeNotification(Self);
+  end;
+end;
+
+procedure TCUDACustomFeedBackMesh.SetElementNumber(AValue: Integer);
+begin
+  if AValue < 0 then
+    AValue := 0;
+  FElementNumber := AValue;
+  FVAO.NotifyChangesOfData;
+end;
+
+procedure TCUDACustomFeedBackMesh.SetPrimitiveType(AValue: TFeedBackMeshPrimitive);
+begin
+  FPrimitiveType := AValue;
+end;
+
+procedure TCUDACustomFeedBackMesh.SetShader(AShader: TGLSLShader);
+begin
+  if AShader <> FShader then
+  begin
+    if Assigned(FShader) then
+      FShader.RemoveFreeNotification(Self);
+    FShader := AShader;
+    if Assigned(FShader) then
+      FShader.FreeNotification(Self);
+    if not (csLoading in ComponentState) then
+      RefreshAttributes;
+  end;
+end;
+
+procedure TCUDACustomFeedBackMesh.SetVertexNumber(AValue: Integer);
+begin
+  if AValue < 1 then
+    AValue := 1;
+  FVertexNumber := AValue;
+  FVAO.NotifyChangesOfData;
+end;
+
+//------------------------------------------
+initialization
+//------------------------------------------
+
+  RegisterClasses([TCUDAImageResource, TCUDAGeometryResource,
+    TCUDACustomFeedBackMesh, TCUDAFeedbackMesh]);
+
+end.
+

+ 515 - 515
Source/GLS.CUDAParser.pas → Source/GPU.CUDAParser.pas

@@ -1,515 +1,515 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAParser;
-
-(*
-   Helper unit for parsing CU modules and get information about.
-   kernel's functions, textures, shared and constants memory.
-*)
-
-interface
-
-uses
-  System.Classes,
-  System.SysUtils,
-  GLS.CUDARunTime;
-
-type
-
-  TCUDAType =
-    (
-    customType,
-    char1,
-    uchar1,
-    char2,
-    uchar2,
-    char3,
-    uchar3,
-    char4,
-    uchar4,
-    short1,
-    ushort1,
-    short2,
-    ushort2,
-    short3,
-    ushort3,
-    short4,
-    ushort4,
-    int1,
-    uint1,
-    int2,
-    uint2,
-    int3,
-    uint3,
-    int4,
-    uint4,
-    long1,
-    ulong1,
-    long2,
-    ulong2,
-    long3,
-    ulong3,
-    long4,
-    ulong4,
-    float1,
-    float2,
-    float3,
-    float4,
-    longlong1,
-    ulonglong1,
-    longlong2,
-    ulonglong2,
-    longlong3,
-    ulonglong3,
-    longlong4,
-    ulonglong4,
-    double1,
-    double2,
-    double3,
-    double4,
-    int8,
-    int16,
-    int32,
-    uint8,
-    uint16,
-    uint32
-    );
-
-  TCUDATexRefInfo = record
-    Name: string;
-    DataType: TCUDAType;
-    Dim: Byte;
-    ReadMode: TcudaTextureReadMode;
-  end;
-
-  TCUDAFuncArgInfo = record
-    Name: string;
-    DataType: TCUDAType;
-    CustomType: string;
-    Ref: Boolean;
-  end;
-
-  TCUDAFuncInfo = record
-    Name: string;
-    KernelName: string;
-    Args: array of TCUDAFuncArgInfo;
-  end;
-
-  TCUDAConstantInfo = record
-    Name: string;
-    DataType: TCUDAType;
-    CustomType: string;
-    Ref: Boolean;
-    DefValue: Boolean;
-  end;
-
-  TCUDAModuleInfo = class(TObject)
-  private
-    ping, pong: TStrings;
-    procedure Reset;
-    procedure BreakStrings(inlist, outlist: TStrings);
-    procedure RemoveComents(inlist, outlist: TStrings);
-    procedure RemoveSpaces(inlist, outlist: TStrings);
-    procedure ReplaceUnsigned(inlist, outlist: TStrings);
-    procedure FindTexRef(inlist: TStrings);
-    procedure FindConst(inlist: TStrings);
-    procedure FindFunc(inlist: TStrings);
-    procedure FindFuncKernelName(inlist: TStrings);
-  public
-    Owner: TComponent;
-    TexRef: array of TCUDATexRefInfo;
-    Func: array of TCUDAFuncInfo;
-    Constant: array of TCUDAConstantInfo;
-    constructor Create;
-    destructor Destroy; override;
-    procedure ParseModule(ASource, AProduct: TStrings);
-  end;
-
-//-------------------------------------------
-implementation
-//-------------------------------------------
-
-uses
-  GLS.Strings;
-
-const
-  WordDelimiters: set of AnsiChar = [#0..#255] - ['a'..'z','A'..'Z','1'..'9','0','_'];
-  sCUDAType: array[TCUDAType] of string =
-  (
-    '',
-    'char',
-    'uchar',
-    'char2',
-    'uchar2',
-    'char3',
-    'uchar3',
-    'char4',
-    'uchar4',
-    'short',
-    'ushort',
-    'short2',
-    'ushort2',
-    'short3',
-    'ushort3',
-    'short4',
-    'ushort4',
-    'int',
-    'uint',
-    'int2',
-    'uint2',
-    'int3',
-    'uint3',
-    'int4',
-    'uint4',
-    'long',
-    'ulong',
-    'long2',
-    'ulong2',
-    'long3',
-    'ulong3',
-    'long4',
-    'ulong4',
-    'float',
-    'float2',
-    'float3',
-    'float4',
-    'longlong',
-    'ulonglong',
-    'longlong2',
-    'ulonglong2',
-    'longlong3',
-    'ulonglong3',
-    'longlong4',
-    'ulonglong4',
-    'double',
-    'double2',
-    'double3',
-    'double4',
-    'int8',
-    'int16',
-    'int32',
-    'uint8',
-    'uint16',
-    'uint32'
-    );
-
-function StrToCUDAType(const AToken: string): TCUDAType;
-var
-  T: TCUDAType;
-begin
-  for T := char1 to uint32 do
-    if AToken = sCUDAType[T] then
-    begin
-      exit(T);
-    end;
-  Result := customType;
-end;
-
-procedure TCUDAModuleInfo.BreakStrings(inlist, outlist: TStrings);
-var
-  i: Integer;
-  str, accum: string;
-  c: Char;
-begin
-  str := inlist.Text;
-  outlist.Clear;
-  accum := '';
-
-  for I := 1 to Length(str) do
-  begin
-    c := str[I];
-    if CharInSet(c, WordDelimiters) then
-    begin
-      if Length(accum) > 0 then
-      begin
-        outlist.Add(accum);
-        accum := '';
-      end;
-      outlist.Add(c);
-    end
-    else
-      accum := accum + str[I];
-  end;
-end;
-
-procedure TCUDAModuleInfo.RemoveComents(inlist, outlist: TStrings);
-var
-  bSkipToLineBreak: Boolean;
-  bSkipToRemarkEnd: Boolean;
-  i: Integer;
-  str1, str2: string;
-begin
-  outlist.Clear;
-  bSkipToLineBreak := False;
-  bSkipToRemarkEnd := False;
-  for I := 0 to inlist.Count - 2 do
-  begin
-    str1 := inlist[I];
-    str2 := inlist[I+1];
-
-    if bSkipToLineBreak then
-    begin
-      if (str1 = #13) then
-        bSkipToLineBreak := False;
-      continue;
-    end;
-
-    if bSkipToRemarkEnd then
-    begin
-      if (str1 = '*') and (str2 = '/')  then
-        bSkipToRemarkEnd := False;
-      continue;
-    end;
-
-    if (str1 = '/') and (str2 = '/') then
-    begin
-      bSkipToLineBreak := True;
-      continue;
-    end
-    else if (str1 = '/') and (str2 = '*') then
-    begin
-      bSkipToRemarkEnd := True;
-      continue;
-    end;
-
-    outlist.Add(str1);
-  end;
-end;
-
-procedure TCUDAModuleInfo.RemoveSpaces(inlist, outlist: TStrings);
-var
-  i: Integer;
-begin
-  outlist.Clear;
-  for I := 0 to inlist.Count - 2 do
-    if inlist[i] > #32 then
-      outlist.Add(inlist[i]);
-end;
-
-procedure TCUDAModuleInfo.ReplaceUnsigned(inlist, outlist: TStrings);
-var
-  I: Integer;
-begin
-  outlist.Clear;
-  I := 0;
-  repeat
-    if (inlist[I] = 'unsigned') and (inlist[I+1] = 'int') then
-    begin
-      outlist.Add('uint32');
-      Inc(I);
-    end
-    else
-      outlist.Add(inlist[I]);
-   Inc(I);
-  until I >= inlist.Count;
-end;
-
-procedure TCUDAModuleInfo.FindTexRef(inlist: TStrings);
-var
-  i, p, e: Integer;
-  texInfo: TCUDATexRefInfo;
-begin
-  for I := 0 to inlist.Count - 1 do
-  begin
-    if UpperCase(inlist[i]) = 'TEXTURE' then
-    begin
-      if inlist[i+1] <> '<' then
-        continue;
-      texInfo.DataType := StrToCUDAType(inlist[i+2]);
-      if inlist[i+3] <> ',' then
-        continue;
-      Val(inlist[i+4], texInfo.Dim, e);
-      if e <> 0 then
-        Continue;
-
-      p := 5;
-      if inlist[i+5] = ',' then
-      begin
-        if inlist[i+6] = 'cudaReadModeElementType' then
-          texInfo.ReadMode := cudaReadModeElementType
-        else if inlist[i+6] = 'cudaReadModeNormalizedFloat' then
-          texInfo.ReadMode := cudaReadModeNormalizedFloat
-        else
-          Continue;
-        p := 7;
-      end;
-      if inlist[i+p] <> '>' then
-        continue;
-      texInfo.Name := inlist[i+p+1];
-      SetLength(TexRef, Length(TexRef)+1);
-      TexRef[High(TexRef)] := texInfo;
-    end;
-  end;
-end;
-
-constructor TCUDAModuleInfo.Create;
-begin
-  ping := TStringList.Create;
-  pong := TStringList.Create;
-end;
-
-destructor TCUDAModuleInfo.Destroy;
-begin
-  ping.Destroy;
-  pong.Destroy;
-end;
-
-procedure TCUDAModuleInfo.FindConst(inlist: TStrings);
-var
-  i, p: Integer;
-  constInfo: TCUDAConstantInfo;
-begin
-  for I := 0 to inlist.Count - 1 do
-  begin
-    if UpperCase(inlist[i]) = '__CONSTANT__' then
-    begin
-      p := i+1;
-      if inlist[p] = 'static' then
-        Inc(p);
-      constInfo.DataType := StrToCUDAType(inlist[p]);
-      if constInfo.DataType = customType then
-        constInfo.CustomType := inlist[p]
-      else
-        constInfo.CustomType := '';
-      Inc(p);
-
-      if inlist[p] = '*' then
-      begin
-        constInfo.Ref := True;
-        Inc(p);
-      end
-      else
-        constInfo.Ref := False;
-
-      constInfo.Name := inlist[p];
-      Inc(p);
-      constInfo.DefValue := False;
-      while p < inlist.Count do
-      begin
-        if inlist[p] = '=' then
-        begin
-          constInfo.DefValue := True;
-          break;
-        end
-        else if inlist[p] = ';' then
-          break;
-        Inc(p);
-      end;
-      SetLength(Constant, Length(Constant)+1);
-      Constant[High(Constant)] := constInfo;
-    end;
-  end;
-end;
-
-procedure TCUDAModuleInfo.FindFunc(inlist: TStrings);
-var
-  i, p: Integer;
-  funcInfo: TCUDAFuncInfo;
-  argInfo: TCUDAFuncArgInfo;
-begin
-  for I := 0 to inlist.Count - 1 do
-  begin
-    if UpperCase(inlist[i]) = '__GLOBAL__' then
-    begin
-      if inlist[i+1] <> 'void' then
-        Continue;
-      funcInfo.Name := inlist[i+2];
-      funcInfo.KernelName := '';
-      if inlist[i+3] <> '(' then
-        Continue;
-
-      p := 4;
-      funcInfo.Args := nil;
-      while inlist[i+p] <> ')' do
-      begin
-        if inlist[i+p] = ',' then
-        begin
-          inc(p);
-          Continue;
-        end;
-        argInfo.DataType := StrToCUDAType(inlist[i+p]);
-        if argInfo.DataType = customType then
-          argInfo.CustomType := inlist[i+p]
-        else
-          argInfo.CustomType := '';
-        Inc(p);
-
-        if inlist[i+p] = '*' then
-        begin
-          argInfo.Ref := True;
-          Inc(p);
-        end
-        else
-          argInfo.Ref := False;
-
-        argInfo.Name := inlist[i+p];
-        SetLength(funcInfo.Args, Length(funcInfo.Args)+1);
-        funcInfo.Args[High(funcInfo.Args)] := argInfo;
-        inc(p);
-      end;
-      SetLength(Func, Length(Func)+1);
-      Func[High(Func)] := funcInfo;
-    end;
-  end;
-end;
-
-procedure TCUDAModuleInfo.FindFuncKernelName(inlist: TStrings);
-var
-  I, J, P: Integer;
-  LStr: string;
-begin
-  for J := 0 to inlist.Count - 1 do
-  begin
-    LStr := inlist[J];
-    P := Pos('.entry', LStr);
-    if P > 0 then
-    begin
-      Delete(LStr, 1, P+6);
-      P := Pos(' ', LStr);
-      if P < 1 then
-        continue;
-      LStr := Copy(LStr, 1, P-1);
-      for I := 0 to High(Func) do
-      begin
-        if Pos(Func[I].Name, LStr) > 0 then
-        begin
-          if Length(Func[I].KernelName) > Length(LStr) then
-            continue;
-          Func[I].KernelName := LStr;
-          break;
-        end;
-      end;
-    end;
-  end;
-end;
-
-procedure TCUDAModuleInfo.Reset;
-var
-  i: Integer;
-begin
-  TexRef := nil;
-  Constant:= nil;
-  for I := 0 to High(Func) do
-    Func[I].Args := nil;
-  Func := nil;
-end;
-
-procedure TCUDAModuleInfo.ParseModule(ASource, AProduct: TStrings);
-begin
-  Reset;
-  BreakStrings(ASource, ping);
-  RemoveComents(ping, pong);
-  RemoveSpaces(pong, ping);
-  ReplaceUnsigned(ping, pong);
-  FindTexRef(pong);
-  FindConst(pong);
-  FindFunc(pong);
-  // Double call to confidence
-  FindFuncKernelName(AProduct);
-  FindFuncKernelName(AProduct);
-end;
-
-end.
-
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDAParser;
+
+(*
+   Helper unit for parsing CU modules and get information about.
+   kernel's functions, textures, shared and constants memory.
+*)
+
+interface
+
+uses
+  System.Classes,
+  System.SysUtils,
+  Import.CUDARunTime;
+
+type
+
+  TCUDAType =
+    (
+    customType,
+    char1,
+    uchar1,
+    char2,
+    uchar2,
+    char3,
+    uchar3,
+    char4,
+    uchar4,
+    short1,
+    ushort1,
+    short2,
+    ushort2,
+    short3,
+    ushort3,
+    short4,
+    ushort4,
+    int1,
+    uint1,
+    int2,
+    uint2,
+    int3,
+    uint3,
+    int4,
+    uint4,
+    long1,
+    ulong1,
+    long2,
+    ulong2,
+    long3,
+    ulong3,
+    long4,
+    ulong4,
+    float1,
+    float2,
+    float3,
+    float4,
+    longlong1,
+    ulonglong1,
+    longlong2,
+    ulonglong2,
+    longlong3,
+    ulonglong3,
+    longlong4,
+    ulonglong4,
+    double1,
+    double2,
+    double3,
+    double4,
+    int8,
+    int16,
+    int32,
+    uint8,
+    uint16,
+    uint32
+    );
+
+  TCUDATexRefInfo = record
+    Name: string;
+    DataType: TCUDAType;
+    Dim: Byte;
+    ReadMode: TcudaTextureReadMode;
+  end;
+
+  TCUDAFuncArgInfo = record
+    Name: string;
+    DataType: TCUDAType;
+    CustomType: string;
+    Ref: Boolean;
+  end;
+
+  TCUDAFuncInfo = record
+    Name: string;
+    KernelName: string;
+    Args: array of TCUDAFuncArgInfo;
+  end;
+
+  TCUDAConstantInfo = record
+    Name: string;
+    DataType: TCUDAType;
+    CustomType: string;
+    Ref: Boolean;
+    DefValue: Boolean;
+  end;
+
+  TCUDAModuleInfo = class(TObject)
+  private
+    ping, pong: TStrings;
+    procedure Reset;
+    procedure BreakStrings(inlist, outlist: TStrings);
+    procedure RemoveComents(inlist, outlist: TStrings);
+    procedure RemoveSpaces(inlist, outlist: TStrings);
+    procedure ReplaceUnsigned(inlist, outlist: TStrings);
+    procedure FindTexRef(inlist: TStrings);
+    procedure FindConst(inlist: TStrings);
+    procedure FindFunc(inlist: TStrings);
+    procedure FindFuncKernelName(inlist: TStrings);
+  public
+    Owner: TComponent;
+    TexRef: array of TCUDATexRefInfo;
+    Func: array of TCUDAFuncInfo;
+    Constant: array of TCUDAConstantInfo;
+    constructor Create;
+    destructor Destroy; override;
+    procedure ParseModule(ASource, AProduct: TStrings);
+  end;
+
+//-------------------------------------------
+implementation
+//-------------------------------------------
+
+uses
+  GLS.Strings;
+
+const
+  WordDelimiters: set of AnsiChar = [#0..#255] - ['a'..'z','A'..'Z','1'..'9','0','_'];
+  sCUDAType: array[TCUDAType] of string =
+  (
+    '',
+    'char',
+    'uchar',
+    'char2',
+    'uchar2',
+    'char3',
+    'uchar3',
+    'char4',
+    'uchar4',
+    'short',
+    'ushort',
+    'short2',
+    'ushort2',
+    'short3',
+    'ushort3',
+    'short4',
+    'ushort4',
+    'int',
+    'uint',
+    'int2',
+    'uint2',
+    'int3',
+    'uint3',
+    'int4',
+    'uint4',
+    'long',
+    'ulong',
+    'long2',
+    'ulong2',
+    'long3',
+    'ulong3',
+    'long4',
+    'ulong4',
+    'float',
+    'float2',
+    'float3',
+    'float4',
+    'longlong',
+    'ulonglong',
+    'longlong2',
+    'ulonglong2',
+    'longlong3',
+    'ulonglong3',
+    'longlong4',
+    'ulonglong4',
+    'double',
+    'double2',
+    'double3',
+    'double4',
+    'int8',
+    'int16',
+    'int32',
+    'uint8',
+    'uint16',
+    'uint32'
+    );
+
+function StrToCUDAType(const AToken: string): TCUDAType;
+var
+  T: TCUDAType;
+begin
+  for T := char1 to uint32 do
+    if AToken = sCUDAType[T] then
+    begin
+      exit(T);
+    end;
+  Result := customType;
+end;
+
+procedure TCUDAModuleInfo.BreakStrings(inlist, outlist: TStrings);
+var
+  i: Integer;
+  str, accum: string;
+  c: Char;
+begin
+  str := inlist.Text;
+  outlist.Clear;
+  accum := '';
+
+  for I := 1 to Length(str) do
+  begin
+    c := str[I];
+    if CharInSet(c, WordDelimiters) then
+    begin
+      if Length(accum) > 0 then
+      begin
+        outlist.Add(accum);
+        accum := '';
+      end;
+      outlist.Add(c);
+    end
+    else
+      accum := accum + str[I];
+  end;
+end;
+
+procedure TCUDAModuleInfo.RemoveComents(inlist, outlist: TStrings);
+var
+  bSkipToLineBreak: Boolean;
+  bSkipToRemarkEnd: Boolean;
+  i: Integer;
+  str1, str2: string;
+begin
+  outlist.Clear;
+  bSkipToLineBreak := False;
+  bSkipToRemarkEnd := False;
+  for I := 0 to inlist.Count - 2 do
+  begin
+    str1 := inlist[I];
+    str2 := inlist[I+1];
+
+    if bSkipToLineBreak then
+    begin
+      if (str1 = #13) then
+        bSkipToLineBreak := False;
+      continue;
+    end;
+
+    if bSkipToRemarkEnd then
+    begin
+      if (str1 = '*') and (str2 = '/')  then
+        bSkipToRemarkEnd := False;
+      continue;
+    end;
+
+    if (str1 = '/') and (str2 = '/') then
+    begin
+      bSkipToLineBreak := True;
+      continue;
+    end
+    else if (str1 = '/') and (str2 = '*') then
+    begin
+      bSkipToRemarkEnd := True;
+      continue;
+    end;
+
+    outlist.Add(str1);
+  end;
+end;
+
+procedure TCUDAModuleInfo.RemoveSpaces(inlist, outlist: TStrings);
+var
+  i: Integer;
+begin
+  outlist.Clear;
+  for I := 0 to inlist.Count - 2 do
+    if inlist[i] > #32 then
+      outlist.Add(inlist[i]);
+end;
+
+procedure TCUDAModuleInfo.ReplaceUnsigned(inlist, outlist: TStrings);
+var
+  I: Integer;
+begin
+  outlist.Clear;
+  I := 0;
+  repeat
+    if (inlist[I] = 'unsigned') and (inlist[I+1] = 'int') then
+    begin
+      outlist.Add('uint32');
+      Inc(I);
+    end
+    else
+      outlist.Add(inlist[I]);
+   Inc(I);
+  until I >= inlist.Count;
+end;
+
+procedure TCUDAModuleInfo.FindTexRef(inlist: TStrings);
+var
+  i, p, e: Integer;
+  texInfo: TCUDATexRefInfo;
+begin
+  for I := 0 to inlist.Count - 1 do
+  begin
+    if UpperCase(inlist[i]) = 'TEXTURE' then
+    begin
+      if inlist[i+1] <> '<' then
+        continue;
+      texInfo.DataType := StrToCUDAType(inlist[i+2]);
+      if inlist[i+3] <> ',' then
+        continue;
+      Val(inlist[i+4], texInfo.Dim, e);
+      if e <> 0 then
+        Continue;
+
+      p := 5;
+      if inlist[i+5] = ',' then
+      begin
+        if inlist[i+6] = 'cudaReadModeElementType' then
+          texInfo.ReadMode := cudaReadModeElementType
+        else if inlist[i+6] = 'cudaReadModeNormalizedFloat' then
+          texInfo.ReadMode := cudaReadModeNormalizedFloat
+        else
+          Continue;
+        p := 7;
+      end;
+      if inlist[i+p] <> '>' then
+        continue;
+      texInfo.Name := inlist[i+p+1];
+      SetLength(TexRef, Length(TexRef)+1);
+      TexRef[High(TexRef)] := texInfo;
+    end;
+  end;
+end;
+
+constructor TCUDAModuleInfo.Create;
+begin
+  ping := TStringList.Create;
+  pong := TStringList.Create;
+end;
+
+destructor TCUDAModuleInfo.Destroy;
+begin
+  ping.Destroy;
+  pong.Destroy;
+end;
+
+procedure TCUDAModuleInfo.FindConst(inlist: TStrings);
+var
+  i, p: Integer;
+  constInfo: TCUDAConstantInfo;
+begin
+  for I := 0 to inlist.Count - 1 do
+  begin
+    if UpperCase(inlist[i]) = '__CONSTANT__' then
+    begin
+      p := i+1;
+      if inlist[p] = 'static' then
+        Inc(p);
+      constInfo.DataType := StrToCUDAType(inlist[p]);
+      if constInfo.DataType = customType then
+        constInfo.CustomType := inlist[p]
+      else
+        constInfo.CustomType := '';
+      Inc(p);
+
+      if inlist[p] = '*' then
+      begin
+        constInfo.Ref := True;
+        Inc(p);
+      end
+      else
+        constInfo.Ref := False;
+
+      constInfo.Name := inlist[p];
+      Inc(p);
+      constInfo.DefValue := False;
+      while p < inlist.Count do
+      begin
+        if inlist[p] = '=' then
+        begin
+          constInfo.DefValue := True;
+          break;
+        end
+        else if inlist[p] = ';' then
+          break;
+        Inc(p);
+      end;
+      SetLength(Constant, Length(Constant)+1);
+      Constant[High(Constant)] := constInfo;
+    end;
+  end;
+end;
+
+procedure TCUDAModuleInfo.FindFunc(inlist: TStrings);
+var
+  i, p: Integer;
+  funcInfo: TCUDAFuncInfo;
+  argInfo: TCUDAFuncArgInfo;
+begin
+  for I := 0 to inlist.Count - 1 do
+  begin
+    if UpperCase(inlist[i]) = '__GLOBAL__' then
+    begin
+      if inlist[i+1] <> 'void' then
+        Continue;
+      funcInfo.Name := inlist[i+2];
+      funcInfo.KernelName := '';
+      if inlist[i+3] <> '(' then
+        Continue;
+
+      p := 4;
+      funcInfo.Args := nil;
+      while inlist[i+p] <> ')' do
+      begin
+        if inlist[i+p] = ',' then
+        begin
+          inc(p);
+          Continue;
+        end;
+        argInfo.DataType := StrToCUDAType(inlist[i+p]);
+        if argInfo.DataType = customType then
+          argInfo.CustomType := inlist[i+p]
+        else
+          argInfo.CustomType := '';
+        Inc(p);
+
+        if inlist[i+p] = '*' then
+        begin
+          argInfo.Ref := True;
+          Inc(p);
+        end
+        else
+          argInfo.Ref := False;
+
+        argInfo.Name := inlist[i+p];
+        SetLength(funcInfo.Args, Length(funcInfo.Args)+1);
+        funcInfo.Args[High(funcInfo.Args)] := argInfo;
+        inc(p);
+      end;
+      SetLength(Func, Length(Func)+1);
+      Func[High(Func)] := funcInfo;
+    end;
+  end;
+end;
+
+procedure TCUDAModuleInfo.FindFuncKernelName(inlist: TStrings);
+var
+  I, J, P: Integer;
+  LStr: string;
+begin
+  for J := 0 to inlist.Count - 1 do
+  begin
+    LStr := inlist[J];
+    P := Pos('.entry', LStr);
+    if P > 0 then
+    begin
+      Delete(LStr, 1, P+6);
+      P := Pos(' ', LStr);
+      if P < 1 then
+        continue;
+      LStr := Copy(LStr, 1, P-1);
+      for I := 0 to High(Func) do
+      begin
+        if Pos(Func[I].Name, LStr) > 0 then
+        begin
+          if Length(Func[I].KernelName) > Length(LStr) then
+            continue;
+          Func[I].KernelName := LStr;
+          break;
+        end;
+      end;
+    end;
+  end;
+end;
+
+procedure TCUDAModuleInfo.Reset;
+var
+  i: Integer;
+begin
+  TexRef := nil;
+  Constant:= nil;
+  for I := 0 to High(Func) do
+    Func[I].Args := nil;
+  Func := nil;
+end;
+
+procedure TCUDAModuleInfo.ParseModule(ASource, AProduct: TStrings);
+begin
+  Reset;
+  BreakStrings(ASource, ping);
+  RemoveComents(ping, pong);
+  RemoveSpaces(pong, ping);
+  ReplaceUnsigned(ping, pong);
+  FindTexRef(pong);
+  FindConst(pong);
+  FindFunc(pong);
+  // Double call to confidence
+  FindFuncKernelName(AProduct);
+  FindFuncKernelName(AProduct);
+end;
+
+end.
+

+ 416 - 447
Source/GLS.ParallelRegister.pas → Source/GPU.CUDAPropEditors.pas

@@ -1,447 +1,416 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.ParallelRegister;
-
-(*  Registration unit for GPU Computing package *)
-
-interface
-
-uses
-  System.Classes,
-  System.SysUtils,
-  DesignIntf,
-  DesignEditors,
-  StrEdit,
-  ToolsAPI,
-   
-  GLS.SceneRegister;
-
-procedure Register;
-
-type
-
-  TGLSCUDAEditor = class(TComponentEditor)
-  public
-    procedure Edit; override;
-    procedure ExecuteVerb(Index: Integer); override;
-    function GetVerb(Index: Integer): string; override;
-    function GetVerbCount: Integer; override;
-  end;
-
-  TGLSCUDACompilerEditor = class(TComponentEditor)
-  public
-    procedure Edit; override;
-    procedure ExecuteVerb(Index: Integer); override;
-    function GetVerb(Index: Integer): string; override;
-    function GetVerbCount: Integer; override;
-  end;
-
-  TGLSCUDACompilerSourceProperty = class(TStringProperty)
-  private
-    FModuleList: TStringList;
-    procedure RefreshModuleList;
-  public
-    constructor Create(const ADesigner: IDesigner; APropCount: Integer); override;
-    destructor Destroy; override;
-    function GetAttributes: TPropertyAttributes; override;
-    procedure GetValues(Proc: TGetStrProc); override;
-    procedure SetValue(const Value: String); override;
-  end;
-
-  TGLSCUDADeviceProperty = class(TStringProperty)
-  private
-    FDeviceList: TStringList;
-  public
-    constructor Create(const ADesigner: IDesigner; APropCount: Integer); override;
-    destructor Destroy; override;
-    function GetAttributes: TPropertyAttributes; override;
-    procedure GetValues(Proc: TGetStrProc); override;
-    procedure SetValue(const Value: String); override;
-  end;
-
-//--------------------------------------------------------------------
-implementation
-//--------------------------------------------------------------------
-
-uses
-  GLS.CUDARunTime,
-  GLS.CUDAContext,
-  GLS.CUDA,
-  GLS.CUDACompiler,
-  GLS.CUDAFFTPlan,
-  GLS.CUDAGraphics,
-  GLS.CUDAParser,
-  FCUDAEditor;
-
-function FindCuFile(var AModuleName: string): Boolean;
-var
-  proj: IOTAProject;
-  I: Integer;
-  LModule: IOTAModuleInfo;
-  LName: string;
-begin
-  proj := GetActiveProject;
-  if proj <> nil then
-  begin
-    for I := 0 to proj.GetModuleCount - 1 do
-    begin
-      LModule := proj.GetModule(I);
-      LName := ExtractFileName(LModule.FileName);
-      if LName = AModuleName then
-      begin
-        AModuleName := LModule.FileName;
-        exit(True);
-      end;
-    end;
-  end;
-  Result := False;
-end;
-
-// ------------------
-// ------------------ TGLSCUDAEditor ------------------
-// ------------------
-
-procedure TGLSCUDAEditor.Edit;
-begin
-  with GLSCUDAEditorForm do
-  begin
-    SetCUDAEditorClient(TGLSCUDA(Self.Component), Self.Designer);
-    Show;
-  end;
-end;
-
-procedure TGLSCUDAEditor.ExecuteVerb(Index: Integer);
-begin
-  case Index of
-    0: Edit;
-  end;
-end;
-
-function TGLSCUDAEditor.GetVerb(Index: Integer): string;
-begin
-  case Index of
-    0: Result := 'Show CUDA Items Editor';
-  end;
-end;
-
-function TGLSCUDAEditor.GetVerbCount: Integer;
-begin
-  Result := 1;
-end;
-
-// ------------------
-// ------------------ TGLSCUDACompilerEditor ------------------
-// ------------------
-
-procedure TGLSCUDACompilerEditor.Edit;
-var
-  CUDACompiler: TGLSCUDACompiler;
-  I, J: Integer;
-  func: TCUDAFunction;
-  tex: TCUDATexture;
-  cnst: TCUDAConstant;
-  param: TCUDAFuncParam;
-  parent: TCUDAModule;
-  info: TCUDAModuleInfo;
-  bUseless: Boolean;
-  useless: array of TCUDAComponent;
-  CTN: TChannelTypeAndNum;
-
-  procedure CreateFuncParams;
-  var
-    K: Integer;
-  begin
-    for K := 0 to High(info.Func[I].Args) do
-    begin
-      param := TCUDAFuncParam(Designer.CreateComponent(TCUDAFuncParam,
-        func, 0, 0, 0, 0));
-      param.Master := TCUDAComponent(func);
-      param.KernelName := info.Func[I].Args[K].Name;
-      param.Name := func.KernelName+'_'+param.KernelName;
-      param.DataType := info.Func[I].Args[K].DataType;
-      param.CustomType := info.Func[I].Args[K].CustomType;
-      param.Reference := info.Func[I].Args[K].Ref;
-    end;
-  end;
-
-begin
-  CUDACompiler := TGLSCUDACompiler(Self.Component);
-  if CUDACompiler.Compile then
-  begin
-    info := CUDACompiler.ModuleInfo;
-    parent := TCUDAModule(info.Owner);
-
-    // Creates kernel's functions
-    for I := 0 to High(info.Func) do
-    begin
-      func := parent.KernelFunction[info.Func[I].KernelName];
-      if not Assigned(func) then
-      begin
-        func := TCUDAFunction(Designer.CreateComponent(TCUDAFunction,
-          info.Owner, 0, 0, 0, 0));
-        func.Master := TCUDAComponent(info.Owner);
-        func.KernelName := info.Func[I].KernelName;
-        func.Name := TCUDAComponent(info.Owner).MakeUniqueName(info.Func[I].Name);
-      end
-      else
-      begin
-        // for old parameters
-        while func.ItemsCount > 0 do
-          func.Items[0].Destroy;
-      end;
-
-      try
-        bUseless := func.Handle = nil;
-      except
-        bUseless := True;
-      end;
-      if bUseless then
-      begin
-        Designer.SelectComponent(func);
-        Designer.DeleteSelection(True);
-        func := nil;
-      end
-      else
-        CreateFuncParams;
-    end;
-
-    // Creates kernel's textures
-    for I := 0 to High(info.TexRef) do
-    begin
-      tex := parent.KernelTexture[info.TexRef[I].Name];
-      if not Assigned(tex) then
-      begin
-        tex := TCUDATexture(Designer.CreateComponent(TCUDATexture,
-          info.Owner, 0, 0, 0, 0));
-        tex.Master := TCUDAComponent(info.Owner);
-        tex.KernelName := info.TexRef[I].Name;
-        tex.Name := tex.KernelName;
-        tex.ReadAsInteger := (info.TexRef[I].ReadMode = cudaReadModeElementType);
-        CTN := GetChannelTypeAndNum(info.TexRef[I].DataType);
-        tex.Format := CTN.F;
-      end;
-
-      tex.ChannelNum := CTN.C;
-      try
-        bUseless := tex.Handle = nil;
-      except
-        bUseless := True;
-      end;
-      if bUseless then
-      begin
-        Designer.SelectComponent(tex);
-        Designer.DeleteSelection(True);
-      end;
-    end;
-    // Creates kernel's constants
-    for I := 0 to High(info.Constant) do
-    begin
-      cnst := parent.KernelConstant[info.Constant[I].Name];
-      if not Assigned(cnst) then
-      begin
-        cnst := TCUDAConstant(Designer.CreateComponent(TCUDAConstant,
-          info.Owner, 0, 0, 0, 0));
-        cnst.Master := TCUDAComponent(info.Owner);
-        cnst.KernelName := info.Constant[I].Name;
-        cnst.Name := cnst.KernelName;
-        cnst.DataType := info.Constant[I].DataType;
-        cnst.CustomType := info.Constant[I].CustomType;
-        cnst.IsValueDefined := info.Constant[I].DefValue;
-      end;
-
-      try
-        bUseless := cnst.DeviceAddress = nil;
-      except
-        bUseless := True;
-      end;
-      if bUseless then
-      begin
-        Designer.SelectComponent(cnst);
-        Designer.DeleteSelection(True);
-      end;
-    end;
-
-    // Delete useless components
-    SetLength(useless, parent.ItemsCount);
-    j := 0;
-    for i := 0 to parent.ItemsCount - 1 do
-    begin
-      if not TCUDAComponent(parent.Items[i]).IsAllocated then
-        begin
-          useless[j] := parent.Items[i];
-          inc(j);
-        end;
-    end;
-
-    for i := 0 to j - 1 do
-      useless[i].Destroy;
-  end;
-  Designer.Modified;
-end;
-
-procedure TGLSCUDACompilerEditor.ExecuteVerb(Index: Integer);
-begin
-  case Index of
-    0: Edit;
-  end;
-end;
-
-function TGLSCUDACompilerEditor.GetVerb(Index: Integer): string;
-begin
-  case Index of
-    0: Result := 'Compile Module';
-  end;
-end;
-
-function TGLSCUDACompilerEditor.GetVerbCount: Integer;
-begin
-  Result := 1;
-end;
-
-// ------------------
-// ------------------ TGLSCUDACompilerSourceProperty ------------------
-// ------------------
-
-constructor TGLSCUDACompilerSourceProperty.Create(
-    const ADesigner: IDesigner; APropCount: Integer);
-begin
-  inherited;
-  FModuleList := TStringList.Create;
-end;
-
- 
-destructor TGLSCUDACompilerSourceProperty.Destroy;
-begin
-  FModuleList.Destroy;
-  inherited;
-end;
-
-procedure TGLSCUDACompilerSourceProperty.RefreshModuleList;
-var
-  proj: IOTAProject;
-  I: Integer;
-  LModule: IOTAModuleInfo;
-  LName: string;
-begin
-  FModuleList.Clear;
-  FModuleList.Add('none');
-  proj := GetActiveProject;
-  if proj <> nil then
-  begin
-    for I := 0 to proj.GetModuleCount - 1 do
-    begin
-      LModule := proj.GetModule(I);
-      LName := UpperCase(ExtractFileExt(LModule.FileName));
-      if LName = '.CU' then
-        FModuleList.Add(LModule.FileName);
-    end;
-  end;
-end;
-
-
-function TGLSCUDACompilerSourceProperty.GetAttributes;
-begin
-  Result := [paValueList];
-end;
-
-procedure TGLSCUDACompilerSourceProperty.GetValues(Proc: TGetStrProc);
-var
-   I : Integer;
-begin
-  RefreshModuleList;
-  for I := 0 to FModuleList.Count - 1 do
-      Proc(ExtractFileName(FModuleList[I]));
-end;
-
-procedure TGLSCUDACompilerSourceProperty.SetValue(const Value: String);
-var
-  I, J: Integer;
-begin
-  RefreshModuleList;
-  J := -1;
-  for I := 1 to FModuleList.Count - 1 do
-    if Value = ExtractFileName(FModuleList[I]) then
-    begin
-      J := I;
-      Break;
-    end;
-
-  if J > 0 then
-  begin
-    TGLSCUDACompiler(GetComponent(0)).SetSourceCodeFile(FModuleList[J]);
-    SetStrValue(ExtractFileName(Value));
-  end
-  else
-  begin
-    SetStrValue('none');
-  end;
-	Modified;
-end;
-
-// ------------------
-// ------------------ TGLSCUDADeviceProperty ------------------
-// ------------------
-
-constructor TGLSCUDADeviceProperty.Create(const ADesigner: IDesigner; APropCount: Integer);
-begin
-  inherited;
-  FDeviceList := TStringList.Create;
-end;
-
-destructor TGLSCUDADeviceProperty.Destroy;
-begin
-  FDeviceList.Destroy;
-  inherited;
-end;
-
-function TGLSCUDADeviceProperty.GetAttributes: TPropertyAttributes;
-begin
-  Result := [paValueList];
-end;
-
-procedure TGLSCUDADeviceProperty.GetValues(Proc: TGetStrProc);
-begin
-  CUDAContextManager.FillUnusedDeviceList(FDeviceList);
-end;
-
-procedure TGLSCUDADeviceProperty.SetValue(const Value: String);
-var
-  I: Integer;
-begin
-  for I := 0 to FDeviceList.Count - 1 do
-    if Value = FDeviceList[I] then
-    begin
-      SetStrValue(Value);
-      Break;
-    end;
-  Modified;
-end;
-
-procedure Register;
-begin
-  RegisterComponents('GLScene GPU Computing', [TGLSCUDA, TGLSCUDADevice,
-    TGLSCUDACompiler]);
-  RegisterComponentEditor(TGLSCUDA, TGLSCUDAEditor);
-  RegisterComponentEditor(TGLSCUDACompiler, TGLSCUDACompilerEditor);
-  RegisterPropertyEditor(TypeInfo(string), TGLSCUDACompiler, 'ProjectModule',
-    TGLSCUDACompilerSourceProperty);
-  RegisterPropertyEditor(TypeInfo(string), TGLSCUDADevice, 'SelectDevice',
-    TGLSCUDADeviceProperty);
-  RegisterNoIcon([TCUDAModule, TCUDAMemData, TCUDAFunction, TCUDATexture,
-    TCUDAFFTPlan, TCUDAImageResource, TCUDAGeometryResource, TCUDAConstant,
-    TCUDAFuncParam]);
-
-  ObjectManager.RegisterSceneObject(TGLFeedbackMesh, 'GPU generated mesh', 'GPU Computing', HInstance);
-end;
-
-
-//------------------------------------------------------
-initialization
-//------------------------------------------------------
-
-  vFindCuFileFunc := FindCuFile;
-
-end.
-
+unit GPU.CUDAPropEditors;
+
+interface
+
+uses
+  System.Classes,
+  System.SysUtils,
+
+  ToolsAPI,
+  StrEdit,
+  DesignEditors,
+  DesignIntf,
+
+  GPU.CUDA,
+  GPU.CUDAContext,
+  GPU.CUDACompiler,
+  GPU.CUDAParser,
+
+  FCUDAEditor;
+
+type
+  TGLCUDAEditor = class(TComponentEditor)
+  public
+    procedure Edit; override;
+    procedure ExecuteVerb(Index: Integer); override;
+    function GetVerb(Index: Integer): string; override;
+    function GetVerbCount: Integer; override;
+  end;
+
+  TGLCUDACompilerEditor = class(TComponentEditor)
+  public
+    procedure Edit; override;
+    procedure ExecuteVerb(Index: Integer); override;
+    function GetVerb(Index: Integer): string; override;
+    function GetVerbCount: Integer; override;
+  end;
+
+  TGLCUDACompilerSourceProperty = class(TStringProperty)
+  private
+    FModuleList: TStringList;
+    procedure RefreshModuleList;
+  public
+    constructor Create(const ADesigner: IDesigner; APropCount: Integer); override;
+    destructor Destroy; override;
+    function GetAttributes: TPropertyAttributes; override;
+    procedure GetValues(Proc: TGetStrProc); override;
+    procedure SetValue(const Value: String); override;
+  end;
+
+  TGLCUDADeviceProperty = class(TStringProperty)
+  private
+    FDeviceList: TStringList;
+  public
+    constructor Create(const ADesigner: IDesigner; APropCount: Integer); override;
+    destructor Destroy; override;
+    function GetAttributes: TPropertyAttributes; override;
+    procedure GetValues(Proc: TGetStrProc); override;
+    procedure SetValue(const Value: String); override;
+  end;
+
+
+function FindCuFile(var AModuleName: string): Boolean;
+
+//-----------------------------------------------
+implementation
+//-----------------------------------------------
+
+uses
+  Import.CUDARunTime;
+
+function FindCuFile(var AModuleName: string): Boolean;
+var
+  proj: IOTAProject;
+  I: Integer;
+  LModule: IOTAModuleInfo;
+  LName: string;
+begin
+  proj := GetActiveProject;
+  if proj <> nil then
+  begin
+    for I := 0 to proj.GetModuleCount - 1 do
+    begin
+      LModule := proj.GetModule(I);
+      LName := ExtractFileName(LModule.FileName);
+      if LName = AModuleName then
+      begin
+        AModuleName := LModule.FileName;
+        exit(True);
+      end;
+    end;
+  end;
+  Result := False;
+end;
+
+// ------------------
+// ------------------ TGLCUDAEditor ------------------
+// ------------------
+
+procedure TGLCUDAEditor.Edit;
+begin
+  with GLSCUDAEditorForm do
+  begin
+    SetCUDAEditorClient(TGLCUDA(Self.Component), Self.Designer);
+    Show;
+  end;
+end;
+
+procedure TGLCUDAEditor.ExecuteVerb(Index: Integer);
+begin
+  case Index of
+    0: Edit;
+  end;
+end;
+
+function TGLCUDAEditor.GetVerb(Index: Integer): string;
+begin
+  case Index of
+    0: Result := 'Show CUDA Items Editor';
+  end;
+end;
+
+function TGLCUDAEditor.GetVerbCount: Integer;
+begin
+  Result := 1;
+end;
+
+
+// ------------------
+// ------------------ TGLCUDACompilerEditor ------------------
+// ------------------
+
+procedure TGLCUDACompilerEditor.Edit;
+var
+  CUDACompiler: TGLCUDACompiler;
+  I, J: Integer;
+  func: TCUDAFunction;
+  tex: TCUDATexture;
+  cnst: TCUDAConstant;
+  param: TCUDAFuncParam;
+  parent: TCUDAModule;
+  info: TCUDAModuleInfo;
+  bUseless: Boolean;
+  useless: array of TCUDAComponent;
+  CTN: TChannelTypeAndNum;
+
+  procedure CreateFuncParams;
+  var
+    K: Integer;
+  begin
+    for K := 0 to High(info.Func[I].Args) do
+    begin
+      param := TCUDAFuncParam(Designer.CreateComponent(TCUDAFuncParam,
+        func, 0, 0, 0, 0));
+      param.Master := TCUDAComponent(func);
+      param.KernelName := info.Func[I].Args[K].Name;
+      param.Name := func.KernelName+'_'+param.KernelName;
+      param.DataType := info.Func[I].Args[K].DataType;
+      param.CustomType := info.Func[I].Args[K].CustomType;
+      param.Reference := info.Func[I].Args[K].Ref;
+    end;
+  end;
+
+begin
+  CUDACompiler := TGLCUDACompiler(Self.Component);
+  if CUDACompiler.Compile then
+  begin
+    info := CUDACompiler.ModuleInfo;
+    parent := TCUDAModule(info.Owner);
+
+    // Creates kernel's functions
+    for I := 0 to High(info.Func) do
+    begin
+      func := parent.KernelFunction[info.Func[I].KernelName];
+      if not Assigned(func) then
+      begin
+        func := TCUDAFunction(Designer.CreateComponent(TCUDAFunction,
+          info.Owner, 0, 0, 0, 0));
+        func.Master := TCUDAComponent(info.Owner);
+        func.KernelName := info.Func[I].KernelName;
+        func.Name := TCUDAComponent(info.Owner).MakeUniqueName(info.Func[I].Name);
+      end
+      else
+      begin
+        // for old parameters
+        while func.ItemsCount > 0 do
+          func.Items[0].Destroy;
+      end;
+
+      try
+        bUseless := func.Handle = nil;
+      except
+        bUseless := True;
+      end;
+      if bUseless then
+      begin
+        Designer.SelectComponent(func);
+        Designer.DeleteSelection(True);
+        func := nil;
+      end
+      else
+        CreateFuncParams;
+    end;
+
+    // Creates kernel's textures
+    for I := 0 to High(info.TexRef) do
+    begin
+      tex := parent.KernelTexture[info.TexRef[I].Name];
+      if not Assigned(tex) then
+      begin
+        tex := TCUDATexture(Designer.CreateComponent(TCUDATexture,
+          info.Owner, 0, 0, 0, 0));
+        tex.Master := TCUDAComponent(info.Owner);
+        tex.KernelName := info.TexRef[I].Name;
+        tex.Name := tex.KernelName;
+        tex.ReadAsInteger := (info.TexRef[I].ReadMode = cudaReadModeElementType);
+        CTN := GetChannelTypeAndNum(info.TexRef[I].DataType);
+        tex.Format := CTN.F;
+      end;
+
+      tex.ChannelNum := CTN.C;
+      try
+        bUseless := tex.Handle = nil;
+      except
+        bUseless := True;
+      end;
+      if bUseless then
+      begin
+        Designer.SelectComponent(tex);
+        Designer.DeleteSelection(True);
+      end;
+    end;
+    // Creates kernel's constants
+    for I := 0 to High(info.Constant) do
+    begin
+      cnst := parent.KernelConstant[info.Constant[I].Name];
+      if not Assigned(cnst) then
+      begin
+        cnst := TCUDAConstant(Designer.CreateComponent(TCUDAConstant,
+          info.Owner, 0, 0, 0, 0));
+        cnst.Master := TCUDAComponent(info.Owner);
+        cnst.KernelName := info.Constant[I].Name;
+        cnst.Name := cnst.KernelName;
+        cnst.DataType := info.Constant[I].DataType;
+        cnst.CustomType := info.Constant[I].CustomType;
+        cnst.IsValueDefined := info.Constant[I].DefValue;
+      end;
+
+      try
+        bUseless := cnst.DeviceAddress = nil;
+      except
+        bUseless := True;
+      end;
+      if bUseless then
+      begin
+        Designer.SelectComponent(cnst);
+        Designer.DeleteSelection(True);
+      end;
+    end;
+
+    // Delete useless components
+    SetLength(useless, parent.ItemsCount);
+    j := 0;
+    for i := 0 to parent.ItemsCount - 1 do
+    begin
+      if not TCUDAComponent(parent.Items[i]).IsAllocated then
+        begin
+          useless[j] := parent.Items[i];
+          inc(j);
+        end;
+    end;
+
+    for i := 0 to j - 1 do
+      useless[i].Destroy;
+  end;
+  Designer.Modified;
+end;
+
+procedure TGLCUDACompilerEditor.ExecuteVerb(Index: Integer);
+begin
+  case Index of
+    0: Edit;
+  end;
+end;
+
+function TGLCUDACompilerEditor.GetVerb(Index: Integer): string;
+begin
+  case Index of
+    0: Result := 'Compile Module';
+  end;
+end;
+
+function TGLCUDACompilerEditor.GetVerbCount: Integer;
+begin
+  Result := 1;
+end;
+
+// ------------------
+// ------------------ TGLCUDACompilerSourceProperty ------------------
+// ------------------
+
+constructor TGLCUDACompilerSourceProperty.Create(
+    const ADesigner: IDesigner; APropCount: Integer);
+begin
+  inherited;
+  FModuleList := TStringList.Create;
+end;
+
+
+destructor TGLCUDACompilerSourceProperty.Destroy;
+begin
+  FModuleList.Destroy;
+  inherited;
+end;
+
+procedure TGLCUDACompilerSourceProperty.RefreshModuleList;
+var
+  proj: IOTAProject;
+  I: Integer;
+  LModule: IOTAModuleInfo;
+  LName: string;
+begin
+  FModuleList.Clear;
+  FModuleList.Add('none');
+  proj := GetActiveProject;
+  if proj <> nil then
+  begin
+    for I := 0 to proj.GetModuleCount - 1 do
+    begin
+      LModule := proj.GetModule(I);
+      LName := UpperCase(ExtractFileExt(LModule.FileName));
+      if LName = '.CU' then
+        FModuleList.Add(LModule.FileName);
+    end;
+  end;
+end;
+
+
+function TGLCUDACompilerSourceProperty.GetAttributes;
+begin
+  Result := [paValueList];
+end;
+
+procedure TGLCUDACompilerSourceProperty.GetValues(Proc: TGetStrProc);
+var
+   I : Integer;
+begin
+  RefreshModuleList;
+  for I := 0 to FModuleList.Count - 1 do
+      Proc(ExtractFileName(FModuleList[I]));
+end;
+
+procedure TGLCUDACompilerSourceProperty.SetValue(const Value: String);
+var
+  I, J: Integer;
+begin
+  RefreshModuleList;
+  J := -1;
+  for I := 1 to FModuleList.Count - 1 do
+    if Value = ExtractFileName(FModuleList[I]) then
+    begin
+      J := I;
+      Break;
+    end;
+
+  if J > 0 then
+  begin
+    TGLCUDACompiler(GetComponent(0)).SetSourceCodeFile(FModuleList[J]);
+    SetStrValue(ExtractFileName(Value));
+  end
+  else
+  begin
+    SetStrValue('none');
+  end;
+	Modified;
+end;
+
+// ------------------
+// ------------------ TGLCUDADeviceProperty ------------------
+// ------------------
+
+constructor TGLCUDADeviceProperty.Create(const ADesigner: IDesigner; APropCount: Integer);
+begin
+  inherited;
+  FDeviceList := TStringList.Create;
+end;
+
+destructor TGLCUDADeviceProperty.Destroy;
+begin
+  FDeviceList.Destroy;
+  inherited;
+end;
+
+function TGLCUDADeviceProperty.GetAttributes: TPropertyAttributes;
+begin
+  Result := [paValueList];
+end;
+
+procedure TGLCUDADeviceProperty.GetValues(Proc: TGetStrProc);
+begin
+  CUDAContextManager.FillUnusedDeviceList(FDeviceList);
+end;
+
+procedure TGLCUDADeviceProperty.SetValue(const Value: String);
+var
+  I: Integer;
+begin
+  for I := 0 to FDeviceList.Count - 1 do
+    if Value = FDeviceList[I] then
+    begin
+      SetStrValue(Value);
+      Break;
+    end;
+  Modified;
+end;
+
+end.

+ 65 - 0
Source/GPU.CUDARegister.pas

@@ -0,0 +1,65 @@
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDARegister;
+
+(*  Registration unit for GPU Computing package *)
+
+interface
+
+uses
+  System.Classes,
+  System.SysUtils,
+  DesignIntf,
+  DesignEditors,
+  ToolsAPI,
+  StrEdit,
+
+  GLS.SceneRegister,
+
+  GPU.CUDA,
+  GPU.CUDAContext,
+
+
+  GPU.CUDAPropEditors;
+
+
+procedure Register;
+
+//--------------------------------------------------------------------
+implementation
+//--------------------------------------------------------------------
+
+uses
+//  Import.CUDARunTime,
+  GPU.CUDAGraphics,
+  GPU.CUDACompiler,
+  GPU.CUDAFFTPlan,
+  GPU.CUDAParser;
+
+
+procedure Register;
+begin
+  RegisterComponents('GLScene GPU Computing', [TGLCUDA, TGLCUDADevice, TGLCUDACompiler]);
+  RegisterComponentEditor(TGLCUDA, TGLCUDAEditor);
+  RegisterComponentEditor(TGLCUDACompiler, TGLCUDACompilerEditor);
+  RegisterPropertyEditor(TypeInfo(string), TGLCUDACompiler, 'ProjectModule',
+    TGLCUDACompilerSourceProperty);
+  RegisterPropertyEditor(TypeInfo(string), TGLCUDADevice, 'SelectDevice',
+    TGLCUDADeviceProperty);
+  RegisterNoIcon([TCUDAModule, TCUDAMemData, TCUDAFunction, TCUDATexture,
+    TCUDAFFTPlan, TCUDAImageResource, TCUDAGeometryResource, TCUDAConstant, TCUDAFuncParam]);
+
+  ObjectManager.RegisterSceneObject(TCUDAFeedbackMesh, 'GPU generated mesh', 'GPU Computing', HInstance);
+end;
+
+
+//------------------------------------------------------
+initialization
+//------------------------------------------------------
+
+  vFindCuFileFunc := FindCuFile;
+
+end.
+

+ 786 - 786
Source/GLS.CUDARuntime.pas → Source/GPU.CUDARuntime.pas

@@ -1,786 +1,786 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDARuntime;
-
-(* GLScene CUDA Runtime *)
-
-(*
- * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO USER:
- *
- * This source code is subject to NVIDIA ownership rights under U.S. and
- * international Copyright laws.  Users and possessors of this source code
- * are hereby granted a nonexclusive, royalty-free license to use this code
- * in individual and commercial software.
- *
- * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
- * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
- * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
- * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
- * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
- * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
- * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
- * OR PERFORMANCE OF THIS SOURCE CODE.
- *
- * U.S. Government End Users.   This source code is a "commercial item" as
- * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
- * "commercial computer  software"  and "commercial computer software
- * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
- * and is provided to the U.S. Government only as a commercial end item.
- * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
- * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
- * source code with only those rights set forth herein.
- *
- * Any use of this source code in individual and commercial software must
- * include, in the user documentation and internal comments to the code,
- * the above Disclaimer and U.S. Government End Users Notice.
- *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  Winapi.Windows,
-
-  GLS.CUDAApi,
-  GLS.Logger;
-
-const
-{$IFDEF WIN32}
-  CUDARTDLLNAMES: array [0 .. 9] of string = (
-    'cudart32_42_9', 'cudart32_41_28',
-    'cudart32_40_10', 'cudart32_32_16', 'cudart32_31_4',
-    'cudart32_30_14', 'cudart32_30_9', 'cudart32_30_8', 'cudart32', 'cudart');
-{$ENDIF}
-
-{$IFDEF WIN64}
-  CUDARTDLLNAMES: array [0 .. 7] of string = (
-    'cudart64_42_9', 'cudart64_41_28',
-    'cudart64_40_10', 'cudart64_32_16', 'cudart64_31_4',
-    'cudart64_30_14', 'cudart64_30_9', 'cudart64_30_8');
-{$ENDIF}
-
-const
-  // single precision constants
-  CUDART_INF_F: Single = $7F800000;
-  CUDART_NAN_F: Single = $7FFFFFFF;
-  CUDART_MIN_DENORM_F: Single = $00000001;
-  CUDART_MAX_NORMAL_F: Single = $7F7FFFFF;
-  CUDART_NEG_ZERO_F: Single = $80000000;
-  CUDART_ZERO_F = 0.0;
-  CUDART_ONE_F = 1.0;
-  CUDART_SQRT_HALF_F = 0.707106781;
-  CUDART_SQRT_TWO_F = 1.414213562;
-  CUDART_THIRD_F = 0.333333333;
-  CUDART_PIO4_F = 0.785398163;
-  CUDART_PIO2_F = 1.570796327;
-  CUDART_3PIO4_F = 2.356194490;
-  CUDART_2_OVER_PI_F = 0.636619772;
-  CUDART_PI_F = 3.141592654;
-  CUDART_L2E_F = 1.442695041;
-  CUDART_L2T_F = 3.321928094;
-  CUDART_LG2_F = 0.301029996;
-  CUDART_LGE_F = 0.434294482;
-  CUDART_LN2_F = 0.693147181;
-  CUDART_LNT_F = 2.302585093;
-  CUDART_LNPI_F = 1.144729886;
-  CUDART_TWO_TO_M126_F = 1.175494351E-38;
-  CUDART_TWO_TO_126_F = 8.507059173E37;
-  CUDART_NORM_HUGE_F = 3.402823466E38;
-  CUDART_TWO_TO_23_F = 8388608.0;
-  CUDART_TWO_TO_24_F = 16777216.0;
-  CUDART_TWO_TO_31_F = 2147483648.0;
-  CUDART_TWO_TO_32_F = 4294967296.0;
-  CUDART_REMQUO_BITS_F = 3;
-  CUDART_REMQUO_MASK_F = CUDART_REMQUO_BITS_F;
-  CUDART_TRIG_PLOSS_F = 48039.0;
-
-  // double precision constants */
-{$IFNDEF CUDA_NO_SM_13_DOUBLE_INTRINSICS}
-  CUDART_INF: Double = $7FF0000000000000;
-  CUDART_NAN: Double = $FFF8000000000000;
-  CUDART_NEG_ZERO: Double = $8000000000000000;
-  CUDART_MIN_DENORM: Double = $0000000000000001;
-{$ELSE} // not CUDA_NO_SM_13_DOUBLE_INTRINSICS
-  CUDART_INF: Double = $7FF0000000000000;
-  CUDART_NAN: Double = $FFF8000000000000;
-  CUDART_NEG_ZERO: Double = $8000000000000000;
-  CUDART_MIN_DENORM: Double = $0000000000000001;
-{$ENDIF}
-  CUDART_ZERO = 0.0;
-  CUDART_ONE = 1.0;
-  CUDART_SQRT_TWO = 1.4142135623730951E+0;
-  CUDART_SQRT_HALF = 7.0710678118654757E-1;
-  CUDART_THIRD = 3.3333333333333333E-1;
-  CUDART_TWOTHIRD = 6.6666666666666667E-1;
-  CUDART_PIO4 = 7.8539816339744828E-1;
-  CUDART_PIO4_HI = 7.8539816339744828E-1;
-  CUDART_PIO4_LO = 3.0616169978683830E-17;
-  CUDART_PIO2 = 1.5707963267948966E+0;
-  CUDART_PIO2_HI = 1.5707963267948966E+0;
-  CUDART_PIO2_LO = 6.1232339957367660E-17;
-  CUDART_3PIO4 = 2.3561944901923448E+0;
-  CUDART_2_OVER_PI = 6.3661977236758138E-1;
-  CUDART_PI = 3.1415926535897931E+0;
-  CUDART_PI_HI = 3.1415926535897931E+0;
-  CUDART_PI_LO = 1.2246467991473532E-16;
-  CUDART_SQRT_2PI_HI = 2.5066282746310007E+0;
-  CUDART_SQRT_2PI_LO = -1.8328579980459167E-16;
-  CUDART_SQRT_PIO2_HI = 1.2533141373155003E+0;
-  CUDART_SQRT_PIO2_LO = -9.1642899902295834E-17;
-  CUDART_L2E = 1.4426950408889634E+0;
-  CUDART_L2E_HI = 1.4426950408889634E+0;
-  CUDART_L2E_LO = 2.0355273740931033E-17;
-  CUDART_L2T = 3.3219280948873622E+0;
-  CUDART_LG2 = 3.0102999566398120E-1;
-  CUDART_LG2_HI = 3.0102999566398120E-1;
-  CUDART_LG2_LO = -2.8037281277851704E-18;
-  CUDART_LGE = 4.3429448190325182E-1;
-  CUDART_LGE_HI = 4.3429448190325182E-1;
-  CUDART_LGE_LO = 1.09831965021676510E-17;
-  CUDART_LN2 = 6.9314718055994529E-1;
-  CUDART_LN2_HI = 6.9314718055994529E-1;
-  CUDART_LN2_LO = 2.3190468138462996E-17;
-  CUDART_LNT = 2.3025850929940459E+0;
-  CUDART_LNT_HI = 2.3025850929940459E+0;
-  CUDART_LNT_LO = -2.1707562233822494E-16;
-  CUDART_LNPI = 1.1447298858494002E+0;
-  CUDART_LN2_X_1024 = 7.0978271289338397E+2;
-  CUDART_LN2_X_1025 = 7.1047586007394398E+2;
-  CUDART_LN2_X_1075 = 7.4513321910194122E+2;
-  CUDART_LG2_X_1024 = 3.0825471555991675E+2;
-  CUDART_LG2_X_1075 = 3.2360724533877976E+2;
-  CUDART_TWO_TO_23 = 8388608.0;
-  CUDART_TWO_TO_52 = 4503599627370496.0;
-  CUDART_TWO_TO_54 = 18014398509481984.0;
-  CUDART_TWO_TO_M54 = 5.5511151231257827E-17;
-  CUDART_TWO_TO_M1022 = 2.22507385850720140E-308;
-  CUDART_TRIG_PLOSS = 2147483648.0;
-
-type
-  TcudaError = (cudaSuccess, cudaErrorMissingConfiguration,
-    cudaErrorMemoryAllocation, cudaErrorInitializationError,
-    cudaErrorLaunchFailure, cudaErrorPriorLaunchFailure, cudaErrorLaunchTimeout,
-    cudaErrorLaunchOutOfResources, cudaErrorInvalidDeviceFunction,
-    cudaErrorInvalidConfiguration, cudaErrorInvalidDevice,
-    cudaErrorInvalidValue, cudaErrorInvalidPitchValue, cudaErrorInvalidSymbol,
-    cudaErrorMapBufferObjectFailed, cudaErrorUnmapBufferObjectFailed,
-    cudaErrorInvalidHostPointer, cudaErrorInvalidDevicePointer,
-    cudaErrorInvalidTexture, cudaErrorInvalidTextureBinding,
-    cudaErrorInvalidChannelDescriptor, cudaErrorInvalidMemcpyDirection,
-    cudaErrorAddressOfConstant, cudaErrorTextureFetchFailed,
-    cudaErrorTextureNotBound, cudaErrorSynchronizationError,
-    cudaErrorInvalidFilterSetting, cudaErrorInvalidNormSetting,
-    cudaErrorMixedDeviceExecution, cudaErrorCudartUnloading, cudaErrorUnknown,
-    cudaErrorNotYetImplemented, cudaErrorMemoryValueTooLarge,
-    cudaErrorInvalidResourceHandle, cudaErrorNotReady, cudaErrorStartupFailure,
-    cudaErrorApiFailureBase);
-
-  { +//DEVICE_BUILTIN*/ }
-  TCudaChannelFormatKind = (cudaChannelFormatKindSigned,
-    cudaChannelFormatKindUnsigned, cudaChannelFormatKindFloat);
-
-  TCudaGLMapFlags = (cudaGLMapFlagsNone,
-    /// < Default; Assume resource can be read/written
-    cudaGLMapFlagsReadOnly,
-    /// < CUDA kernels will not write to this resource
-    cudaGLMapFlagsWriteDiscard);
-  /// < CUDA kernels will only write to and will not read from this resource
-
-  { +//DEVICE_BUILTIN*/ }
-  PcudaChannelFormatDesc = ^TCudaChannelFormatDesc;
-
-  TCudaChannelFormatDesc = record
-    x: Integer;
-    y: Integer;
-    z: Integer;
-    w: Integer;
-    f: TCudaChannelFormatKind;
-  end;
-
-  { +//DEVICE_BUILTIN*/ }
-  TcudaArray = record
-  end; // !ATTENTION foreward Declaration?)
-
-  { +//DEVICE_BUILTIN*/ }
-  TcudaMemcpyKind = (cudaMemcpyHostToHost { = 0 } , cudaMemcpyHostToDevice,
-    cudaMemcpyDeviceToHost, cudaMemcpyDeviceToDevice);
-
-  { +//DEVICE_BUILTIN*/ }
-  TcudaPitchedPtr = record
-    ptr: Pointer;
-    pitch: NativeUInt;
-    xsize: NativeUInt;
-    ysize: NativeUInt;
-  end;
-
-  { +//DEVICE_BUILTIN*/ }
-  TcudaExtent = record
-    width: NativeUInt;
-    height: NativeUInt;
-    depth: NativeUInt;
-  end;
-
-  { +//DEVICE_BUILTIN*/ }
-  TcudaPos = record
-    x: NativeUInt;
-    y: NativeUInt;
-    z: NativeUInt;
-  end;
-
-  { +//DEVICE_BUILTIN*/ }
-  TcudaMemcpy3DParms = record
-    srcArray: Pointer;
-    srcPos: TcudaPos;
-    srcPtr: TcudaPitchedPtr;
-    dstArray: Pointer;
-    dstPos: TcudaPos;
-    dstPtr: TcudaPitchedPtr;
-    extent: TcudaExtent;
-    kind: TcudaMemcpyKind;
-  end;
-
-  { +//DEVICE_BUILTIN*/ }
-  PCudaDeviceProp = ^TCudaDeviceProp;
-
-  TCudaDeviceProp = record
-    name: array [0 .. 256 - 1] of AnsiChar;
-    totalGlobalMem: NativeUInt;
-    sharedMemPerBlock: NativeUInt;
-    regsPerBlock: Integer;
-    warpSize: Integer;
-    memPitch: NativeUInt;
-    maxThreadsPerBlock: Integer;
-    maxThreadsDim: array [0 .. 3 - 1] of Integer;
-    maxGridSize: array [0 .. 3 - 1] of Integer;
-    clockRate: Integer;
-    totalConstMem: NativeUInt;
-    major: Integer;
-    minor: Integer;
-    textureAlignment: NativeUInt;
-    deviceOverlap: Integer;
-    multiProcessorCount: Integer;
-    // Specified whether there is a run time limit on kernels
-    kernelExecTimeoutEnabled: Integer;
-    // Device is egrated as opposed to discrete
-    egrated: Integer;
-    // Device can map host memory with cudaHostAlloc/cudaHostGetDevicePoer
-    canMapHostMemory: Integer;
-    // Compute mode (See ::cudaComputeMode)
-    computeMode: Integer;
-    // Maximum 1D texture size
-    maxTexture1D: Integer;
-    // Maximum 2D texture dimensions
-    maxTexture2D: array[0..1] of Integer;
-    // Maximum 3D texture dimensions
-    maxTexture3D: array[0..2] of Integer;
-    // Maximum 2D texture array dimensions
-    maxTexture2DArray: array[0..2] of Integer;
-    // Alignment requirements for surfaces
-    surfaceAlignment: NativeUInt;
-     // Device can possibly execute multiple kernels concurrently
-    concurrentKernels: Integer;
-    // Device has ECC support enabled
-    ECCEnabled: Integer;
-    // PCI bus ID of the device
-    pciBusID: Integer;
-    // PCI device ID of the device
-    pciDeviceID: Integer;
-    // 1 if device is a Tesla device using TCC driver, 0 otherwise
-    tccDriver: Integer;
-    __cudaReserved: array [0 .. 20] of Integer;
-  end;
-
-  TcudaTextureAddressMode = (cudaAddressModeWrap, cudaAddressModeClamp, cudaAddressModeMirror);
-
-  TcudaTextureFilterMode = (cudaFilterModePoint, cudaFilterModeLinear);
-
-  TcudaTextureReadMode = (cudaReadModeElementType, cudaReadModeNormalizedFloat);
-
-  PTextureReference = ^TTextureReference;
-
-  TTextureReference = record
-    normalized: Integer;
-    filterMode: TcudaTextureFilterMode;
-    addressMode: array [0 .. 2] of TcudaTextureAddressMode;
-    channelDesc: TCudaChannelFormatDesc;
-    __cudaReserved: array [0 .. 15] of Integer;
-  end;
-
-  PcudaArray = ^TcudaArray;
-
-  { +//****************************************************************************** }
-  { -** }
-  { -* SHORTHAND TYPE DEFINITION USED BY RUNTIME API* }
-  { -** }
-  { =*******************************************************************************/ }
-
-  { +//DEVICE_BUILTIN*/ }
-  cudaError_t = TcudaError;
-  { +//DEVICE_BUILTIN*/ }
-  cudaStream_t = Integer;
-  { +//DEVICE_BUILTIN*/ }
-  cudaEvent_t = Integer;
-
-(*******************************************************************************)
-
-var
-
-cudaBindTexture: function(var offset: NativeUInt; const texref: PTextureReference;
-    var devPtr: Pointer; var desc: TCudaChannelFormatDesc; size: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaBindTexture2D: function(var offset: NativeUInt; const texref: PTextureReference; const devPtr: Pointer;
-    var desc: TCudaChannelFormatDesc; width, height, pitch: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaBindTextureToArray: function(const texref: PTextureReference; const cudaArray: PcudaArray): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaUnbindTexture: function(const texref: PTextureReference): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetTextureAlignmentOffset: function(offset: NativeUInt; const texref: PTextureReference): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetTextureReference: function(const texref: PTextureReference; const symbol: PAnsiChar): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetChannelDesc: function(var desc: TCudaChannelFormatDesc; const array_: Pointer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaCreateChannelDesc: function(x, y, z, w: Integer; f: TCudaChannelFormatKind): TCudaChannelFormatDesc;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(* ******************************************************************************
-  *                                                                              *
-  *                                                                              *
-  *                                                                              *
-  ****************************************************************************** *)
-
-cudaMalloc3D: function(var pitchedDevPtr: TcudaPitchedPtr; extent: TcudaExtent): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMalloc3DArray: function(var arrayPtr: PcudaArray; const desc: TCudaChannelFormatDesc; extent: TcudaExtent; flags: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemset3D: function(pitchedDevPtr: TcudaPitchedPtr; value: Integer; extent: TcudaExtent): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy3D: function(const p: TcudaMemcpy3DParms): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy3DAsync: function(const p: TcudaMemcpy3DParms; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMalloc: function(var devPtr; size: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMallocHost: function(var ptr: Pointer; size: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  cudaMallocPitch: function(var devPtr; var pitch: NativeUInt; width: NativeUInt; height: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMallocArray: function(var aarray: Pointer; var desc: TCudaChannelFormatDesc; width: NativeUInt; height: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaFree: function(devPtr: Pointer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaFreeHost: function(ptr: Pointer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaFreeArray: function(const aarray: Pointer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaHostAlloc: function(var pHost: Pointer; bytes: NativeUInt; flags: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaHostGetDevicePointer: function(var pDevice: Pointer; pHost: Pointer; flags: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaHostGetFlags: function(var pFlags: Cardinal; pHost: Pointer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemGetInfo: function(var free: NativeUInt; var total: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy: function(dst: Pointer; src: Pointer; count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyToArray: function(var dst: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; var src; count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyFromArray: function(var dst; const src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyArrayToArray: function(dst: PcudaArray; wOffsetDst: NativeUInt; hOffsetDst: NativeUInt; const src: PcudaArray; wOffsetSrc: NativeUInt;
-    hOffsetSrc: NativeUInt; count: NativeUInt; const kind: TcudaMemcpyKind = cudaMemcpyDeviceToDevice): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2D: function(var dst; dpitch: NativeUInt; var src; spitch: NativeUInt;
-    width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2DToArray: function(dst: PcudaArray; wOffset: NativeUInt;
-  hOffset: NativeUInt; var src; spitch: NativeUInt; width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2DFromArray: function(var dst; dpitch: NativeUInt; src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; width: NativeUInt; height: NativeUInt;
-    kind: TcudaMemcpyKind): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2DArrayToArray: function(dst: PcudaArray; wOffsetDst: NativeUInt;
-    hOffsetDst: NativeUInt; src: PcudaArray; wOffsetSrc: NativeUInt; hOffsetSrc: NativeUInt;
-    width: NativeUInt; height: NativeUInt; const kind: TcudaMemcpyKind = cudaMemcpyDeviceToDevice): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyToSymbol: function(symbol: PAnsiChar; var src; count: NativeUInt; const offset: NativeUInt = 0;
-    const kind: TcudaMemcpyKind = cudaMemcpyHostToDevice): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyFromSymbol: function(var dst; symbol: PAnsiChar; count: NativeUInt; const offset: NativeUInt = 0;
-    const kind: TcudaMemcpyKind = cudaMemcpyDeviceToHost): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-
-{ +//*************************************************************************** }
-{ -** }
-{ -** }
-{ -** }
-{ =***************************************************************************** }
-
-cudaMemcpyAsync: function(var dst; const src; count: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyToArrayAsync: function(dst: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; const src; count: NativeUInt; kind: TcudaMemcpyKind;
-    stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyFromArrayAsync: function(var dst; const src: PcudaArray;
-    wOffset: NativeUInt; hOffset: NativeUInt; count: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2DAsync: function(var dst; dpitch: NativeUInt; const src;
-    spitch: NativeUInt; width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2DToArrayAsync: function(dst: PcudaArray; wOffset: NativeUInt;
-    hOffset: NativeUInt; const src; spitch: NativeUInt; width: NativeUInt; height: NativeUInt;
-    kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpy2DFromArrayAsync: function(var dst; dpitch: NativeUInt;
-    const src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; width: NativeUInt;
-    height: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyToSymbolAsync: function(const symbol: PAnsiChar; const src;
-    count: NativeUInt; offset: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemcpyFromSymbolAsync: function(var dst; const symbol: PAnsiChar;
-  count: NativeUInt; offset: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-
-(******************************************************************************
- *                                                                            *
- *                                                                            *
- *                                                                            *
- *****************************************************************************)
-
-cudaMemset: function(var devPtr; value: Integer; count: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaMemset2D: function(var devPtr; pitch: NativeUInt; value: Integer;
-    width: NativeUInt; height: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(*****************************************************************************
- *                                                                           *
- *                                                                           *
- *                                                                           *
- *****************************************************************************)
-
-cudaGetSymbolAddress: function(var devPtr: Pointer; const symbol: PAnsiChar): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetSymbolSize: function(var size: NativeUInt; const symbol: PAnsiChar): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-
-{ +//*************************************************************************** }
-{ -** }
-{ -** }
-{ -** }
-{ =***************************************************************************** }
-
-cudaGetDeviceCount: function(var count: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetDeviceProperties: function(var prop: TCudaDeviceProp; device: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaChooseDevice: function(var device: Integer; const prop: PCudaDeviceProp): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaSetDevice: function(device: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetDevice: function(var device: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaSetDeviceFlags: function(flags: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaSetValidDevices: function(device_arr: PInteger; len: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(******************************************************************************
- *
- *
- *
- *****************************************************************************)
-
-cudaConfigureCall: function(gridDim, blockDim: TDim3; sharedMem: NativeUInt; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaSetupArgument: function(const arg: Pointer; size: NativeUInt; offset: NativeUInt): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaFuncSetCacheConfig: function(const func: PAnsiChar; cacheConfig: TcudaFuncCache): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaLaunch: function(const entry: PAnsiChar): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaFuncGetAttributes: function(var attr: TcudaFuncAttributes; const func: PAnsiChar): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  { +//****************************************************************************** }
-  { -** }
-  { -** }
-  { -** }
-  { =*******************************************************************************/ }
-
-cudaGetLastError: function: cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  { +//****************************************************************************** }
-  { -** }
-  { -** }
-  { -** }
-  { =*******************************************************************************/ }
-cudaGLSetGLDevice: function(device: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLRegisterBufferObject: function(bufObj: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsGLRegisterImage: function(const resource: PCUgraphicsResource; image: Cardinal; target: Cardinal; flags: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsGLRegisterBuffer: function(const resource: PCUgraphicsResource; buffer: Cardinal; flags: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLMapBufferObject: function(devPtr: Pointer; bufObj: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLUnmapBufferObject: function(bufObj: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLUnregisterBufferObject: function(bufObj: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLSetBufferObjectMapFlags: function(bufObj: Cardinal; flags: TCudaGLMapFlags): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLMapBufferObjectAsync: function(var devPtr: Pointer; bufObj: Cardinal; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGLUnmapBufferObjectAsync: function(bufObj: Cardinal; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsUnregisterResource: function(resource: PCUgraphicsResource): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsResourceSetMapFlags: function(resource: PCUgraphicsResource; flags: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsMapResources: function(count: Integer; const resources: PCUgraphicsResource; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsUnmapResources: function(count: Integer; const resources: PCUgraphicsResource; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsResourceGetMappedPointer: function(var pDevPtr: TCUdeviceptr;
-    var pSize: Cardinal; resource: PCUgraphicsResource): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGraphicsSubResourceGetMappedArray: function(var pArray: PCUarray;
-    resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaGetErrorString: function(error: cudaError_t): PAnsiChar;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaDriverGetVersion: function(out driverVersion: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaRuntimeGetVersion: function(out runtimeVersion: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(* ******************************************************************************
- *                                                                              *
- *                                                                              *
- *                                                                              *
- ****************************************************************************** *)
-
-cudaSetDoubleForDevice: function(var d: Double): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaSetDoubleForHost: function(var d: Double): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(* ******************************************************************************
- *                                                                              *
- *                                                                              *
- *                                                                              *
- ****************************************************************************** *)
-
-cudaStreamCreate: function(var pStream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaStreamDestroy: function(stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaStreamSynchronize: function(stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaStreamQuery: function(stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(* ******************************************************************************
- *                                                                              *
- *                                                                              *
- *                                                                              *
- ****************************************************************************** *)
-
-cudaEventCreate: function(var event: cudaEvent_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaEventCreateWithFlags: function(var event: cudaEvent_t; flags: Integer): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaEventRecord: function(event: cudaEvent_t; stream: cudaStream_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaEventQuery: function(event: cudaEvent_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaEventSynchronize: function(event: cudaEvent_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaEventDestroy: function(event: cudaEvent_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaEventElapsedTime: function(var ms: Single; start: cudaEvent_t; ending: cudaEvent_t): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaWGLGetDevice: function(var device: Integer; hGpu: HGPUNV): cudaError_t;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-(* ******************************************************************************
- *                                                                              *
- *                                                                              *
- *                                                                              *
- ****************************************************************************** *)
-
-cudaThreadExit: function(): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaThreadSynchronize: function(): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaThreadSetLimit: function(limit: TcudaLimit; value: NativeUInt): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-cudaThreadGetLimit: function(var value: NativeUInt; limit: TcudaLimit): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-function cudaGetLastErrorString: string;
-function InitCUDART: Boolean;
-procedure CloseCUDART;
-function InitCUDARTFromLibrary(const LibName: WideString): Boolean;
-function IsCUDARTInitialized: Boolean;
-
-//--------------------------------------------
-implementation
-//--------------------------------------------
-
-function cudaGetLastErrorString: string;
-begin
-  Result := string(cudaGetErrorString(cudaGetLastError));
-end;
-
-const
-  INVALID_MODULEHANDLE = 0;
-
-var
-{$IFDEF MSWINDOWS}
-  CUDARTHandle: HINST = INVALID_MODULEHANDLE;
-{$ELSE}
-  CUDARTHandle: TLibHandle = INVALID_MODULEHANDLE;
-{$ENDIF}
-
-function CUDARTGetProcAddress(ProcName: PAnsiChar): Pointer;
-begin
-  Result := GetProcAddress(CUDARTHandle, ProcName);
-end;
-
-function InitCUDART: Boolean;
-var
-  I: Integer;
-begin
-  Result := True;
-  if CUDARTHandle = INVALID_MODULEHANDLE then
-  begin
-    for I := 0 to High(CUDARTDLLNAMES) do
-    begin
-      if InitCUDARTFromLibrary(CUDARTDLLNAMES[I] + '.dll') then
-        Exit;
-    end;
-  end;
-  Result := False;
-end;
-
-procedure CloseCUDART;
-begin
-  if CUDARTHandle <> INVALID_MODULEHANDLE then
-  begin
-    FreeLibrary(CUDARTHandle);
-    CUDARTHandle := INVALID_MODULEHANDLE;
-  end;
-end;
-
-function InitCUDARTFromLibrary(const LibName: WideString): Boolean;
-var
-  V: Integer;
-begin
-  CloseCUDART;
-  CUDARTHandle := GetModuleHandleW(PWideChar(LibName));
-  if CUDARTHandle = INVALID_MODULEHANDLE then
-    CUDARTHandle := LoadLibraryW(PWideChar(LibName));
-
-  if CUDARTHandle = INVALID_MODULEHANDLE then
-    Exit(False);
-
-  cudaFreeHost := CUDARTGetProcAddress('cudaFreeHost');
-  cudaFuncGetAttributes := CUDARTGetProcAddress('cudaFuncGetAttributes');
-  cudaGetChannelDesc := CUDARTGetProcAddress('cudaGetChannelDesc');
-  cudaGetDevice := CUDARTGetProcAddress('cudaGetDevice');
-  cudaGetDeviceCount := CUDARTGetProcAddress('cudaGetDeviceCount');
-  cudaGetDeviceProperties := CUDARTGetProcAddress('cudaGetDeviceProperties');
-  cudaGetErrorString := CUDARTGetProcAddress('cudaGetErrorString');
-  cudaGetLastError := CUDARTGetProcAddress('cudaGetLastError');
-  cudaGetSymbolAddress := CUDARTGetProcAddress('cudaGetSymbolAddress');
-  cudaGetSymbolSize := CUDARTGetProcAddress('cudaGetSymbolSize');
-  cudaGetTextureAlignmentOffset := CUDARTGetProcAddress('cudaGetTextureAlignmentOffset');
-  cudaGetTextureReference := CUDARTGetProcAddress('cudaGetTextureReference');
-  cudaGLMapBufferObject := CUDARTGetProcAddress('cudaGLMapBufferObject');
-  cudaGLMapBufferObjectAsync := CUDARTGetProcAddress('cudaGLMapBufferObjectAsync');
-  cudaGLRegisterBufferObject := CUDARTGetProcAddress('cudaGLRegisterBufferObject');
-  cudaGLSetBufferObjectMapFlags := CUDARTGetProcAddress('cudaGLSetBufferObjectMapFlags');
-  cudaGLSetGLDevice := CUDARTGetProcAddress('cudaGLSetGLDevice');
-  cudaGLUnmapBufferObject := CUDARTGetProcAddress('cudaGLUnmapBufferObject');
-  cudaGLUnmapBufferObjectAsync := CUDARTGetProcAddress('cudaGLUnmapBufferObjectAsync');
-  cudaGLUnregisterBufferObject := CUDARTGetProcAddress('cudaGLUnregisterBufferObject');
-  cudaGraphicsGLRegisterBuffer := CUDARTGetProcAddress('cudaGraphicsGLRegisterBuffer');
-  cudaGraphicsGLRegisterImage := CUDARTGetProcAddress('cudaGraphicsGLRegisterImage');
-  cudaGraphicsMapResources := CUDARTGetProcAddress('cudaGraphicsMapResources');
-  cudaGraphicsResourceGetMappedPointer := CUDARTGetProcAddress('cudaGraphicsResourceGetMappedPointer');
-  cudaGraphicsResourceSetMapFlags := CUDARTGetProcAddress('cudaGraphicsResourceSetMapFlags');
-  cudaGraphicsSubResourceGetMappedArray := CUDARTGetProcAddress('cudaGraphicsSubResourceGetMappedArray');
-  cudaGraphicsUnmapResources := CUDARTGetProcAddress('cudaGraphicsUnmapResources');
-  cudaGraphicsUnregisterResource := CUDARTGetProcAddress('cudaGraphicsUnregisterResource');
-  cudaHostAlloc := CUDARTGetProcAddress('cudaHostAlloc');
-  cudaHostGetDevicePointer := CUDARTGetProcAddress('cudaHostGetDevicePointer');
-  cudaHostGetFlags := CUDARTGetProcAddress('cudaHostGetFlags');
-  cudaLaunch := CUDARTGetProcAddress('cudaLaunch');
-  cudaMalloc := CUDARTGetProcAddress('cudaMalloc');
-  cudaMalloc3D := CUDARTGetProcAddress('cudaMalloc3D');
-  cudaMalloc3DArray := CUDARTGetProcAddress('cudaMalloc3DArray');
-  cudaMallocArray := CUDARTGetProcAddress('cudaMallocArray');
-  cudaMallocHost := CUDARTGetProcAddress('cudaMallocHost');
-  cudaMallocPitch := CUDARTGetProcAddress('cudaMallocPitch');
-  cudaMemcpy := CUDARTGetProcAddress('cudaMemcpy');
-  cudaMemcpy2D := CUDARTGetProcAddress('cudaMemcpy2D');
-  cudaMemcpy2DArrayToArray := CUDARTGetProcAddress('cudaMemcpy2DArrayToArray');
-  cudaMemcpy2DAsync := CUDARTGetProcAddress('cudaMemcpy2DAsync');
-  cudaMemcpy2DFromArray := CUDARTGetProcAddress('cudaMemcpy2DFromArray');
-  cudaMemcpy2DFromArrayAsync := CUDARTGetProcAddress('cudaMemcpy2DFromArrayAsync');
-  cudaMemcpy2DToArray := CUDARTGetProcAddress('cudaMemcpy2DToArray');
-  cudaMemcpy2DToArrayAsync := CUDARTGetProcAddress('cudaMemcpy2DToArrayAsync');
-  cudaMemcpy3D := CUDARTGetProcAddress('cudaMemcpy3D');
-  cudaMemcpy3DAsync := CUDARTGetProcAddress('cudaMemcpy3DAsync');
-  cudaMemcpyArrayToArray := CUDARTGetProcAddress('cudaMemcpyArrayToArray');
-  cudaMemcpyAsync := CUDARTGetProcAddress('cudaMemcpyAsync');
-  cudaMemcpyFromArray := CUDARTGetProcAddress('cudaMemcpyFromArray');
-  cudaMemcpyFromArrayAsync := CUDARTGetProcAddress('cudaMemcpyFromArrayAsync');
-  cudaMemcpyFromSymbol := CUDARTGetProcAddress('cudaMemcpyFromSymbol');
-  cudaMemcpyFromSymbolAsync := CUDARTGetProcAddress('cudaMemcpyFromSymbolAsync');
-  cudaMemcpyToArray := CUDARTGetProcAddress('cudaMemcpyToArray');
-  cudaMemcpyToArrayAsync := CUDARTGetProcAddress('cudaMemcpyToArrayAsync');
-  cudaMemcpyToSymbol := CUDARTGetProcAddress('cudaMemcpyToSymbol');
-  cudaMemcpyToSymbolAsync := CUDARTGetProcAddress('cudaMemcpyToSymbolAsync');
-  cudaMemGetInfo := CUDARTGetProcAddress('cudaMemGetInfo');
-  cudaMemset := CUDARTGetProcAddress('cudaMemset');
-  cudaMemset2D := CUDARTGetProcAddress('cudaMemset2D');
-  cudaMemset3D := CUDARTGetProcAddress('cudaMemset3D');
-  cudaRuntimeGetVersion := CUDARTGetProcAddress('cudaRuntimeGetVersion');
-  cudaSetDevice := CUDARTGetProcAddress('cudaSetDevice');
-  cudaSetDeviceFlags := CUDARTGetProcAddress('cudaSetDeviceFlags');
-  cudaSetDoubleForDevice := CUDARTGetProcAddress('cudaSetDoubleForDevice');
-  cudaSetDoubleForHost := CUDARTGetProcAddress('cudaSetDoubleForHost');
-  cudaSetupArgument := CUDARTGetProcAddress('cudaSetupArgument');
-  cudaSetValidDevices := CUDARTGetProcAddress('cudaSetValidDevices');
-  cudaStreamCreate := CUDARTGetProcAddress('cudaStreamCreate');
-  cudaStreamDestroy := CUDARTGetProcAddress('cudaStreamDestroy');
-  cudaStreamQuery := CUDARTGetProcAddress('cudaStreamQuery');
-  cudaStreamSynchronize := CUDARTGetProcAddress('cudaStreamSynchronize');
-  cudaThreadExit := CUDARTGetProcAddress('cudaThreadExit');
-  cudaThreadSynchronize := CUDARTGetProcAddress('cudaThreadSynchronize');
-  cudaThreadSetLimit := CUDARTGetProcAddress('cudaThreadSetLimit');
-  cudaThreadGetLimit := CUDARTGetProcAddress('cudaThreadGetLimit');
-  cudaUnbindTexture := CUDARTGetProcAddress('cudaUnbindTexture');
-  cudaWGLGetDevice := CUDARTGetProcAddress('cudaWGLGetDevice');
-
-  cudaRuntimeGetVersion(V);
-  GLSLogger.LogInfoFmt('%s version %d is loaded', [LibName, V]);
-  Result := True;
-end;
-
-function IsCUDARTInitialized: Boolean;
-begin
-  Result := (CUDARTHandle <> INVALID_MODULEHANDLE);
-end;
-
-//-----------------------------------------------
-initialization
-//-----------------------------------------------
-
-//-----------------------------------------------
-finalization
-//-----------------------------------------------
-
-CloseCUDART;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDARuntime;
+
+(* GLScene CUDA Runtime *)
+
+(*
+ * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ *)
+
+interface
+
+{$I GLScene.inc}
+
+uses
+  Winapi.Windows,
+
+  Import.CUDAApi,
+  GLS.Logger;
+
+const
+{$IFDEF WIN32}
+  CUDARTDLLNAMES: array [0 .. 9] of string = (
+    'cudart32_42_9', 'cudart32_41_28',
+    'cudart32_40_10', 'cudart32_32_16', 'cudart32_31_4',
+    'cudart32_30_14', 'cudart32_30_9', 'cudart32_30_8', 'cudart32', 'cudart');
+{$ENDIF}
+
+{$IFDEF WIN64}
+  CUDARTDLLNAMES: array [0 .. 7] of string = (
+    'cudart64_42_9', 'cudart64_41_28',
+    'cudart64_40_10', 'cudart64_32_16', 'cudart64_31_4',
+    'cudart64_30_14', 'cudart64_30_9', 'cudart64_30_8');
+{$ENDIF}
+
+const
+  // single precision constants
+  CUDART_INF_F: Single = $7F800000;
+  CUDART_NAN_F: Single = $7FFFFFFF;
+  CUDART_MIN_DENORM_F: Single = $00000001;
+  CUDART_MAX_NORMAL_F: Single = $7F7FFFFF;
+  CUDART_NEG_ZERO_F: Single = $80000000;
+  CUDART_ZERO_F = 0.0;
+  CUDART_ONE_F = 1.0;
+  CUDART_SQRT_HALF_F = 0.707106781;
+  CUDART_SQRT_TWO_F = 1.414213562;
+  CUDART_THIRD_F = 0.333333333;
+  CUDART_PIO4_F = 0.785398163;
+  CUDART_PIO2_F = 1.570796327;
+  CUDART_3PIO4_F = 2.356194490;
+  CUDART_2_OVER_PI_F = 0.636619772;
+  CUDART_PI_F = 3.141592654;
+  CUDART_L2E_F = 1.442695041;
+  CUDART_L2T_F = 3.321928094;
+  CUDART_LG2_F = 0.301029996;
+  CUDART_LGE_F = 0.434294482;
+  CUDART_LN2_F = 0.693147181;
+  CUDART_LNT_F = 2.302585093;
+  CUDART_LNPI_F = 1.144729886;
+  CUDART_TWO_TO_M126_F = 1.175494351E-38;
+  CUDART_TWO_TO_126_F = 8.507059173E37;
+  CUDART_NORM_HUGE_F = 3.402823466E38;
+  CUDART_TWO_TO_23_F = 8388608.0;
+  CUDART_TWO_TO_24_F = 16777216.0;
+  CUDART_TWO_TO_31_F = 2147483648.0;
+  CUDART_TWO_TO_32_F = 4294967296.0;
+  CUDART_REMQUO_BITS_F = 3;
+  CUDART_REMQUO_MASK_F = CUDART_REMQUO_BITS_F;
+  CUDART_TRIG_PLOSS_F = 48039.0;
+
+  // double precision constants */
+{$IFNDEF CUDA_NO_SM_13_DOUBLE_INTRINSICS}
+  CUDART_INF: Double = $7FF0000000000000;
+  CUDART_NAN: Double = $FFF8000000000000;
+  CUDART_NEG_ZERO: Double = $8000000000000000;
+  CUDART_MIN_DENORM: Double = $0000000000000001;
+{$ELSE} // not CUDA_NO_SM_13_DOUBLE_INTRINSICS
+  CUDART_INF: Double = $7FF0000000000000;
+  CUDART_NAN: Double = $FFF8000000000000;
+  CUDART_NEG_ZERO: Double = $8000000000000000;
+  CUDART_MIN_DENORM: Double = $0000000000000001;
+{$ENDIF}
+  CUDART_ZERO = 0.0;
+  CUDART_ONE = 1.0;
+  CUDART_SQRT_TWO = 1.4142135623730951E+0;
+  CUDART_SQRT_HALF = 7.0710678118654757E-1;
+  CUDART_THIRD = 3.3333333333333333E-1;
+  CUDART_TWOTHIRD = 6.6666666666666667E-1;
+  CUDART_PIO4 = 7.8539816339744828E-1;
+  CUDART_PIO4_HI = 7.8539816339744828E-1;
+  CUDART_PIO4_LO = 3.0616169978683830E-17;
+  CUDART_PIO2 = 1.5707963267948966E+0;
+  CUDART_PIO2_HI = 1.5707963267948966E+0;
+  CUDART_PIO2_LO = 6.1232339957367660E-17;
+  CUDART_3PIO4 = 2.3561944901923448E+0;
+  CUDART_2_OVER_PI = 6.3661977236758138E-1;
+  CUDART_PI = 3.1415926535897931E+0;
+  CUDART_PI_HI = 3.1415926535897931E+0;
+  CUDART_PI_LO = 1.2246467991473532E-16;
+  CUDART_SQRT_2PI_HI = 2.5066282746310007E+0;
+  CUDART_SQRT_2PI_LO = -1.8328579980459167E-16;
+  CUDART_SQRT_PIO2_HI = 1.2533141373155003E+0;
+  CUDART_SQRT_PIO2_LO = -9.1642899902295834E-17;
+  CUDART_L2E = 1.4426950408889634E+0;
+  CUDART_L2E_HI = 1.4426950408889634E+0;
+  CUDART_L2E_LO = 2.0355273740931033E-17;
+  CUDART_L2T = 3.3219280948873622E+0;
+  CUDART_LG2 = 3.0102999566398120E-1;
+  CUDART_LG2_HI = 3.0102999566398120E-1;
+  CUDART_LG2_LO = -2.8037281277851704E-18;
+  CUDART_LGE = 4.3429448190325182E-1;
+  CUDART_LGE_HI = 4.3429448190325182E-1;
+  CUDART_LGE_LO = 1.09831965021676510E-17;
+  CUDART_LN2 = 6.9314718055994529E-1;
+  CUDART_LN2_HI = 6.9314718055994529E-1;
+  CUDART_LN2_LO = 2.3190468138462996E-17;
+  CUDART_LNT = 2.3025850929940459E+0;
+  CUDART_LNT_HI = 2.3025850929940459E+0;
+  CUDART_LNT_LO = -2.1707562233822494E-16;
+  CUDART_LNPI = 1.1447298858494002E+0;
+  CUDART_LN2_X_1024 = 7.0978271289338397E+2;
+  CUDART_LN2_X_1025 = 7.1047586007394398E+2;
+  CUDART_LN2_X_1075 = 7.4513321910194122E+2;
+  CUDART_LG2_X_1024 = 3.0825471555991675E+2;
+  CUDART_LG2_X_1075 = 3.2360724533877976E+2;
+  CUDART_TWO_TO_23 = 8388608.0;
+  CUDART_TWO_TO_52 = 4503599627370496.0;
+  CUDART_TWO_TO_54 = 18014398509481984.0;
+  CUDART_TWO_TO_M54 = 5.5511151231257827E-17;
+  CUDART_TWO_TO_M1022 = 2.22507385850720140E-308;
+  CUDART_TRIG_PLOSS = 2147483648.0;
+
+type
+  TcudaError = (cudaSuccess, cudaErrorMissingConfiguration,
+    cudaErrorMemoryAllocation, cudaErrorInitializationError,
+    cudaErrorLaunchFailure, cudaErrorPriorLaunchFailure, cudaErrorLaunchTimeout,
+    cudaErrorLaunchOutOfResources, cudaErrorInvalidDeviceFunction,
+    cudaErrorInvalidConfiguration, cudaErrorInvalidDevice,
+    cudaErrorInvalidValue, cudaErrorInvalidPitchValue, cudaErrorInvalidSymbol,
+    cudaErrorMapBufferObjectFailed, cudaErrorUnmapBufferObjectFailed,
+    cudaErrorInvalidHostPointer, cudaErrorInvalidDevicePointer,
+    cudaErrorInvalidTexture, cudaErrorInvalidTextureBinding,
+    cudaErrorInvalidChannelDescriptor, cudaErrorInvalidMemcpyDirection,
+    cudaErrorAddressOfConstant, cudaErrorTextureFetchFailed,
+    cudaErrorTextureNotBound, cudaErrorSynchronizationError,
+    cudaErrorInvalidFilterSetting, cudaErrorInvalidNormSetting,
+    cudaErrorMixedDeviceExecution, cudaErrorCudartUnloading, cudaErrorUnknown,
+    cudaErrorNotYetImplemented, cudaErrorMemoryValueTooLarge,
+    cudaErrorInvalidResourceHandle, cudaErrorNotReady, cudaErrorStartupFailure,
+    cudaErrorApiFailureBase);
+
+  { +//DEVICE_BUILTIN*/ }
+  TCudaChannelFormatKind = (cudaChannelFormatKindSigned,
+    cudaChannelFormatKindUnsigned, cudaChannelFormatKindFloat);
+
+  TCudaGLMapFlags = (cudaGLMapFlagsNone,
+    /// < Default; Assume resource can be read/written
+    cudaGLMapFlagsReadOnly,
+    /// < CUDA kernels will not write to this resource
+    cudaGLMapFlagsWriteDiscard);
+  /// < CUDA kernels will only write to and will not read from this resource
+
+  { +//DEVICE_BUILTIN*/ }
+  PcudaChannelFormatDesc = ^TCudaChannelFormatDesc;
+
+  TCudaChannelFormatDesc = record
+    x: Integer;
+    y: Integer;
+    z: Integer;
+    w: Integer;
+    f: TCudaChannelFormatKind;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaArray = record
+  end; // !ATTENTION foreward Declaration?)
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaMemcpyKind = (cudaMemcpyHostToHost { = 0 } , cudaMemcpyHostToDevice,
+    cudaMemcpyDeviceToHost, cudaMemcpyDeviceToDevice);
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaPitchedPtr = record
+    ptr: Pointer;
+    pitch: NativeUInt;
+    xsize: NativeUInt;
+    ysize: NativeUInt;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaExtent = record
+    width: NativeUInt;
+    height: NativeUInt;
+    depth: NativeUInt;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaPos = record
+    x: NativeUInt;
+    y: NativeUInt;
+    z: NativeUInt;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaMemcpy3DParms = record
+    srcArray: Pointer;
+    srcPos: TcudaPos;
+    srcPtr: TcudaPitchedPtr;
+    dstArray: Pointer;
+    dstPos: TcudaPos;
+    dstPtr: TcudaPitchedPtr;
+    extent: TcudaExtent;
+    kind: TcudaMemcpyKind;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  PCudaDeviceProp = ^TCudaDeviceProp;
+
+  TCudaDeviceProp = record
+    name: array [0 .. 256 - 1] of AnsiChar;
+    totalGlobalMem: NativeUInt;
+    sharedMemPerBlock: NativeUInt;
+    regsPerBlock: Integer;
+    warpSize: Integer;
+    memPitch: NativeUInt;
+    maxThreadsPerBlock: Integer;
+    maxThreadsDim: array [0 .. 3 - 1] of Integer;
+    maxGridSize: array [0 .. 3 - 1] of Integer;
+    clockRate: Integer;
+    totalConstMem: NativeUInt;
+    major: Integer;
+    minor: Integer;
+    textureAlignment: NativeUInt;
+    deviceOverlap: Integer;
+    multiProcessorCount: Integer;
+    // Specified whether there is a run time limit on kernels
+    kernelExecTimeoutEnabled: Integer;
+    // Device is egrated as opposed to discrete
+    egrated: Integer;
+    // Device can map host memory with cudaHostAlloc/cudaHostGetDevicePoer
+    canMapHostMemory: Integer;
+    // Compute mode (See ::cudaComputeMode)
+    computeMode: Integer;
+    // Maximum 1D texture size
+    maxTexture1D: Integer;
+    // Maximum 2D texture dimensions
+    maxTexture2D: array[0..1] of Integer;
+    // Maximum 3D texture dimensions
+    maxTexture3D: array[0..2] of Integer;
+    // Maximum 2D texture array dimensions
+    maxTexture2DArray: array[0..2] of Integer;
+    // Alignment requirements for surfaces
+    surfaceAlignment: NativeUInt;
+     // Device can possibly execute multiple kernels concurrently
+    concurrentKernels: Integer;
+    // Device has ECC support enabled
+    ECCEnabled: Integer;
+    // PCI bus ID of the device
+    pciBusID: Integer;
+    // PCI device ID of the device
+    pciDeviceID: Integer;
+    // 1 if device is a Tesla device using TCC driver, 0 otherwise
+    tccDriver: Integer;
+    __cudaReserved: array [0 .. 20] of Integer;
+  end;
+
+  TcudaTextureAddressMode = (cudaAddressModeWrap, cudaAddressModeClamp, cudaAddressModeMirror);
+
+  TcudaTextureFilterMode = (cudaFilterModePoint, cudaFilterModeLinear);
+
+  TcudaTextureReadMode = (cudaReadModeElementType, cudaReadModeNormalizedFloat);
+
+  PTextureReference = ^TTextureReference;
+
+  TTextureReference = record
+    normalized: Integer;
+    filterMode: TcudaTextureFilterMode;
+    addressMode: array [0 .. 2] of TcudaTextureAddressMode;
+    channelDesc: TCudaChannelFormatDesc;
+    __cudaReserved: array [0 .. 15] of Integer;
+  end;
+
+  PcudaArray = ^TcudaArray;
+
+  { +//****************************************************************************** }
+  { -** }
+  { -* SHORTHAND TYPE DEFINITION USED BY RUNTIME API* }
+  { -** }
+  { =*******************************************************************************/ }
+
+  { +//DEVICE_BUILTIN*/ }
+  cudaError_t = TcudaError;
+  { +//DEVICE_BUILTIN*/ }
+  cudaStream_t = Integer;
+  { +//DEVICE_BUILTIN*/ }
+  cudaEvent_t = Integer;
+
+(*******************************************************************************)
+
+var
+
+cudaBindTexture: function(var offset: NativeUInt; const texref: PTextureReference;
+    var devPtr: Pointer; var desc: TCudaChannelFormatDesc; size: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaBindTexture2D: function(var offset: NativeUInt; const texref: PTextureReference; const devPtr: Pointer;
+    var desc: TCudaChannelFormatDesc; width, height, pitch: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaBindTextureToArray: function(const texref: PTextureReference; const cudaArray: PcudaArray): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaUnbindTexture: function(const texref: PTextureReference): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetTextureAlignmentOffset: function(offset: NativeUInt; const texref: PTextureReference): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetTextureReference: function(const texref: PTextureReference; const symbol: PAnsiChar): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetChannelDesc: function(var desc: TCudaChannelFormatDesc; const array_: Pointer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaCreateChannelDesc: function(x, y, z, w: Integer; f: TCudaChannelFormatKind): TCudaChannelFormatDesc;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(* ******************************************************************************
+  *                                                                              *
+  *                                                                              *
+  *                                                                              *
+  ****************************************************************************** *)
+
+cudaMalloc3D: function(var pitchedDevPtr: TcudaPitchedPtr; extent: TcudaExtent): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMalloc3DArray: function(var arrayPtr: PcudaArray; const desc: TCudaChannelFormatDesc; extent: TcudaExtent; flags: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemset3D: function(pitchedDevPtr: TcudaPitchedPtr; value: Integer; extent: TcudaExtent): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy3D: function(const p: TcudaMemcpy3DParms): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy3DAsync: function(const p: TcudaMemcpy3DParms; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMalloc: function(var devPtr; size: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMallocHost: function(var ptr: Pointer; size: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+  cudaMallocPitch: function(var devPtr; var pitch: NativeUInt; width: NativeUInt; height: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMallocArray: function(var aarray: Pointer; var desc: TCudaChannelFormatDesc; width: NativeUInt; height: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaFree: function(devPtr: Pointer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaFreeHost: function(ptr: Pointer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaFreeArray: function(const aarray: Pointer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaHostAlloc: function(var pHost: Pointer; bytes: NativeUInt; flags: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaHostGetDevicePointer: function(var pDevice: Pointer; pHost: Pointer; flags: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaHostGetFlags: function(var pFlags: Cardinal; pHost: Pointer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemGetInfo: function(var free: NativeUInt; var total: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy: function(dst: Pointer; src: Pointer; count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyToArray: function(var dst: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; var src; count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyFromArray: function(var dst; const src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyArrayToArray: function(dst: PcudaArray; wOffsetDst: NativeUInt; hOffsetDst: NativeUInt; const src: PcudaArray; wOffsetSrc: NativeUInt;
+    hOffsetSrc: NativeUInt; count: NativeUInt; const kind: TcudaMemcpyKind = cudaMemcpyDeviceToDevice): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2D: function(var dst; dpitch: NativeUInt; var src; spitch: NativeUInt;
+    width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2DToArray: function(dst: PcudaArray; wOffset: NativeUInt;
+  hOffset: NativeUInt; var src; spitch: NativeUInt; width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2DFromArray: function(var dst; dpitch: NativeUInt; src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; width: NativeUInt; height: NativeUInt;
+    kind: TcudaMemcpyKind): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2DArrayToArray: function(dst: PcudaArray; wOffsetDst: NativeUInt;
+    hOffsetDst: NativeUInt; src: PcudaArray; wOffsetSrc: NativeUInt; hOffsetSrc: NativeUInt;
+    width: NativeUInt; height: NativeUInt; const kind: TcudaMemcpyKind = cudaMemcpyDeviceToDevice): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyToSymbol: function(symbol: PAnsiChar; var src; count: NativeUInt; const offset: NativeUInt = 0;
+    const kind: TcudaMemcpyKind = cudaMemcpyHostToDevice): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyFromSymbol: function(var dst; symbol: PAnsiChar; count: NativeUInt; const offset: NativeUInt = 0;
+    const kind: TcudaMemcpyKind = cudaMemcpyDeviceToHost): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+
+{ +//*************************************************************************** }
+{ -** }
+{ -** }
+{ -** }
+{ =***************************************************************************** }
+
+cudaMemcpyAsync: function(var dst; const src; count: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyToArrayAsync: function(dst: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; const src; count: NativeUInt; kind: TcudaMemcpyKind;
+    stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyFromArrayAsync: function(var dst; const src: PcudaArray;
+    wOffset: NativeUInt; hOffset: NativeUInt; count: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2DAsync: function(var dst; dpitch: NativeUInt; const src;
+    spitch: NativeUInt; width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2DToArrayAsync: function(dst: PcudaArray; wOffset: NativeUInt;
+    hOffset: NativeUInt; const src; spitch: NativeUInt; width: NativeUInt; height: NativeUInt;
+    kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpy2DFromArrayAsync: function(var dst; dpitch: NativeUInt;
+    const src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; width: NativeUInt;
+    height: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyToSymbolAsync: function(const symbol: PAnsiChar; const src;
+    count: NativeUInt; offset: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemcpyFromSymbolAsync: function(var dst; const symbol: PAnsiChar;
+  count: NativeUInt; offset: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+
+(******************************************************************************
+ *                                                                            *
+ *                                                                            *
+ *                                                                            *
+ *****************************************************************************)
+
+cudaMemset: function(var devPtr; value: Integer; count: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaMemset2D: function(var devPtr; pitch: NativeUInt; value: Integer;
+    width: NativeUInt; height: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(*****************************************************************************
+ *                                                                           *
+ *                                                                           *
+ *                                                                           *
+ *****************************************************************************)
+
+cudaGetSymbolAddress: function(var devPtr: Pointer; const symbol: PAnsiChar): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetSymbolSize: function(var size: NativeUInt; const symbol: PAnsiChar): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+
+{ +//*************************************************************************** }
+{ -** }
+{ -** }
+{ -** }
+{ =***************************************************************************** }
+
+cudaGetDeviceCount: function(var count: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetDeviceProperties: function(var prop: TCudaDeviceProp; device: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaChooseDevice: function(var device: Integer; const prop: PCudaDeviceProp): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaSetDevice: function(device: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetDevice: function(var device: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaSetDeviceFlags: function(flags: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaSetValidDevices: function(device_arr: PInteger; len: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(******************************************************************************
+ *
+ *
+ *
+ *****************************************************************************)
+
+cudaConfigureCall: function(gridDim, blockDim: TDim3; sharedMem: NativeUInt; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaSetupArgument: function(const arg: Pointer; size: NativeUInt; offset: NativeUInt): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaFuncSetCacheConfig: function(const func: PAnsiChar; cacheConfig: TcudaFuncCache): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaLaunch: function(const entry: PAnsiChar): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaFuncGetAttributes: function(var attr: TcudaFuncAttributes; const func: PAnsiChar): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+  { +//****************************************************************************** }
+  { -** }
+  { -** }
+  { -** }
+  { =*******************************************************************************/ }
+
+cudaGetLastError: function: cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+  { +//****************************************************************************** }
+  { -** }
+  { -** }
+  { -** }
+  { =*******************************************************************************/ }
+cudaGLSetGLDevice: function(device: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLRegisterBufferObject: function(bufObj: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsGLRegisterImage: function(const resource: PCUgraphicsResource; image: Cardinal; target: Cardinal; flags: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsGLRegisterBuffer: function(const resource: PCUgraphicsResource; buffer: Cardinal; flags: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLMapBufferObject: function(devPtr: Pointer; bufObj: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLUnmapBufferObject: function(bufObj: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLUnregisterBufferObject: function(bufObj: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLSetBufferObjectMapFlags: function(bufObj: Cardinal; flags: TCudaGLMapFlags): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLMapBufferObjectAsync: function(var devPtr: Pointer; bufObj: Cardinal; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGLUnmapBufferObjectAsync: function(bufObj: Cardinal; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsUnregisterResource: function(resource: PCUgraphicsResource): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsResourceSetMapFlags: function(resource: PCUgraphicsResource; flags: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsMapResources: function(count: Integer; const resources: PCUgraphicsResource; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsUnmapResources: function(count: Integer; const resources: PCUgraphicsResource; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsResourceGetMappedPointer: function(var pDevPtr: TCUdeviceptr;
+    var pSize: Cardinal; resource: PCUgraphicsResource): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGraphicsSubResourceGetMappedArray: function(var pArray: PCUarray;
+    resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaGetErrorString: function(error: cudaError_t): PAnsiChar;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaDriverGetVersion: function(out driverVersion: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaRuntimeGetVersion: function(out runtimeVersion: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaSetDoubleForDevice: function(var d: Double): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaSetDoubleForHost: function(var d: Double): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaStreamCreate: function(var pStream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaStreamDestroy: function(stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaStreamSynchronize: function(stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaStreamQuery: function(stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaEventCreate: function(var event: cudaEvent_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaEventCreateWithFlags: function(var event: cudaEvent_t; flags: Integer): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaEventRecord: function(event: cudaEvent_t; stream: cudaStream_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaEventQuery: function(event: cudaEvent_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaEventSynchronize: function(event: cudaEvent_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaEventDestroy: function(event: cudaEvent_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaEventElapsedTime: function(var ms: Single; start: cudaEvent_t; ending: cudaEvent_t): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaWGLGetDevice: function(var device: Integer; hGpu: HGPUNV): cudaError_t;
+{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaThreadExit: function(): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaThreadSynchronize: function(): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaThreadSetLimit: function(limit: TcudaLimit; value: NativeUInt): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+cudaThreadGetLimit: function(var value: NativeUInt; limit: TcudaLimit): cudaError_t;{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
+function cudaGetLastErrorString: string;
+function InitCUDART: Boolean;
+procedure CloseCUDART;
+function InitCUDARTFromLibrary(const LibName: WideString): Boolean;
+function IsCUDARTInitialized: Boolean;
+
+//--------------------------------------------
+implementation
+//--------------------------------------------
+
+function cudaGetLastErrorString: string;
+begin
+  Result := string(cudaGetErrorString(cudaGetLastError));
+end;
+
+const
+  INVALID_MODULEHANDLE = 0;
+
+var
+{$IFDEF MSWINDOWS}
+  CUDARTHandle: HINST = INVALID_MODULEHANDLE;
+{$ELSE}
+  CUDARTHandle: TLibHandle = INVALID_MODULEHANDLE;
+{$ENDIF}
+
+function CUDARTGetProcAddress(ProcName: PAnsiChar): Pointer;
+begin
+  Result := GetProcAddress(CUDARTHandle, ProcName);
+end;
+
+function InitCUDART: Boolean;
+var
+  I: Integer;
+begin
+  Result := True;
+  if CUDARTHandle = INVALID_MODULEHANDLE then
+  begin
+    for I := 0 to High(CUDARTDLLNAMES) do
+    begin
+      if InitCUDARTFromLibrary(CUDARTDLLNAMES[I] + '.dll') then
+        Exit;
+    end;
+  end;
+  Result := False;
+end;
+
+procedure CloseCUDART;
+begin
+  if CUDARTHandle <> INVALID_MODULEHANDLE then
+  begin
+    FreeLibrary(CUDARTHandle);
+    CUDARTHandle := INVALID_MODULEHANDLE;
+  end;
+end;
+
+function InitCUDARTFromLibrary(const LibName: WideString): Boolean;
+var
+  V: Integer;
+begin
+  CloseCUDART;
+  CUDARTHandle := GetModuleHandleW(PWideChar(LibName));
+  if CUDARTHandle = INVALID_MODULEHANDLE then
+    CUDARTHandle := LoadLibraryW(PWideChar(LibName));
+
+  if CUDARTHandle = INVALID_MODULEHANDLE then
+    Exit(False);
+
+  cudaFreeHost := CUDARTGetProcAddress('cudaFreeHost');
+  cudaFuncGetAttributes := CUDARTGetProcAddress('cudaFuncGetAttributes');
+  cudaGetChannelDesc := CUDARTGetProcAddress('cudaGetChannelDesc');
+  cudaGetDevice := CUDARTGetProcAddress('cudaGetDevice');
+  cudaGetDeviceCount := CUDARTGetProcAddress('cudaGetDeviceCount');
+  cudaGetDeviceProperties := CUDARTGetProcAddress('cudaGetDeviceProperties');
+  cudaGetErrorString := CUDARTGetProcAddress('cudaGetErrorString');
+  cudaGetLastError := CUDARTGetProcAddress('cudaGetLastError');
+  cudaGetSymbolAddress := CUDARTGetProcAddress('cudaGetSymbolAddress');
+  cudaGetSymbolSize := CUDARTGetProcAddress('cudaGetSymbolSize');
+  cudaGetTextureAlignmentOffset := CUDARTGetProcAddress('cudaGetTextureAlignmentOffset');
+  cudaGetTextureReference := CUDARTGetProcAddress('cudaGetTextureReference');
+  cudaGLMapBufferObject := CUDARTGetProcAddress('cudaGLMapBufferObject');
+  cudaGLMapBufferObjectAsync := CUDARTGetProcAddress('cudaGLMapBufferObjectAsync');
+  cudaGLRegisterBufferObject := CUDARTGetProcAddress('cudaGLRegisterBufferObject');
+  cudaGLSetBufferObjectMapFlags := CUDARTGetProcAddress('cudaGLSetBufferObjectMapFlags');
+  cudaGLSetGLDevice := CUDARTGetProcAddress('cudaGLSetGLDevice');
+  cudaGLUnmapBufferObject := CUDARTGetProcAddress('cudaGLUnmapBufferObject');
+  cudaGLUnmapBufferObjectAsync := CUDARTGetProcAddress('cudaGLUnmapBufferObjectAsync');
+  cudaGLUnregisterBufferObject := CUDARTGetProcAddress('cudaGLUnregisterBufferObject');
+  cudaGraphicsGLRegisterBuffer := CUDARTGetProcAddress('cudaGraphicsGLRegisterBuffer');
+  cudaGraphicsGLRegisterImage := CUDARTGetProcAddress('cudaGraphicsGLRegisterImage');
+  cudaGraphicsMapResources := CUDARTGetProcAddress('cudaGraphicsMapResources');
+  cudaGraphicsResourceGetMappedPointer := CUDARTGetProcAddress('cudaGraphicsResourceGetMappedPointer');
+  cudaGraphicsResourceSetMapFlags := CUDARTGetProcAddress('cudaGraphicsResourceSetMapFlags');
+  cudaGraphicsSubResourceGetMappedArray := CUDARTGetProcAddress('cudaGraphicsSubResourceGetMappedArray');
+  cudaGraphicsUnmapResources := CUDARTGetProcAddress('cudaGraphicsUnmapResources');
+  cudaGraphicsUnregisterResource := CUDARTGetProcAddress('cudaGraphicsUnregisterResource');
+  cudaHostAlloc := CUDARTGetProcAddress('cudaHostAlloc');
+  cudaHostGetDevicePointer := CUDARTGetProcAddress('cudaHostGetDevicePointer');
+  cudaHostGetFlags := CUDARTGetProcAddress('cudaHostGetFlags');
+  cudaLaunch := CUDARTGetProcAddress('cudaLaunch');
+  cudaMalloc := CUDARTGetProcAddress('cudaMalloc');
+  cudaMalloc3D := CUDARTGetProcAddress('cudaMalloc3D');
+  cudaMalloc3DArray := CUDARTGetProcAddress('cudaMalloc3DArray');
+  cudaMallocArray := CUDARTGetProcAddress('cudaMallocArray');
+  cudaMallocHost := CUDARTGetProcAddress('cudaMallocHost');
+  cudaMallocPitch := CUDARTGetProcAddress('cudaMallocPitch');
+  cudaMemcpy := CUDARTGetProcAddress('cudaMemcpy');
+  cudaMemcpy2D := CUDARTGetProcAddress('cudaMemcpy2D');
+  cudaMemcpy2DArrayToArray := CUDARTGetProcAddress('cudaMemcpy2DArrayToArray');
+  cudaMemcpy2DAsync := CUDARTGetProcAddress('cudaMemcpy2DAsync');
+  cudaMemcpy2DFromArray := CUDARTGetProcAddress('cudaMemcpy2DFromArray');
+  cudaMemcpy2DFromArrayAsync := CUDARTGetProcAddress('cudaMemcpy2DFromArrayAsync');
+  cudaMemcpy2DToArray := CUDARTGetProcAddress('cudaMemcpy2DToArray');
+  cudaMemcpy2DToArrayAsync := CUDARTGetProcAddress('cudaMemcpy2DToArrayAsync');
+  cudaMemcpy3D := CUDARTGetProcAddress('cudaMemcpy3D');
+  cudaMemcpy3DAsync := CUDARTGetProcAddress('cudaMemcpy3DAsync');
+  cudaMemcpyArrayToArray := CUDARTGetProcAddress('cudaMemcpyArrayToArray');
+  cudaMemcpyAsync := CUDARTGetProcAddress('cudaMemcpyAsync');
+  cudaMemcpyFromArray := CUDARTGetProcAddress('cudaMemcpyFromArray');
+  cudaMemcpyFromArrayAsync := CUDARTGetProcAddress('cudaMemcpyFromArrayAsync');
+  cudaMemcpyFromSymbol := CUDARTGetProcAddress('cudaMemcpyFromSymbol');
+  cudaMemcpyFromSymbolAsync := CUDARTGetProcAddress('cudaMemcpyFromSymbolAsync');
+  cudaMemcpyToArray := CUDARTGetProcAddress('cudaMemcpyToArray');
+  cudaMemcpyToArrayAsync := CUDARTGetProcAddress('cudaMemcpyToArrayAsync');
+  cudaMemcpyToSymbol := CUDARTGetProcAddress('cudaMemcpyToSymbol');
+  cudaMemcpyToSymbolAsync := CUDARTGetProcAddress('cudaMemcpyToSymbolAsync');
+  cudaMemGetInfo := CUDARTGetProcAddress('cudaMemGetInfo');
+  cudaMemset := CUDARTGetProcAddress('cudaMemset');
+  cudaMemset2D := CUDARTGetProcAddress('cudaMemset2D');
+  cudaMemset3D := CUDARTGetProcAddress('cudaMemset3D');
+  cudaRuntimeGetVersion := CUDARTGetProcAddress('cudaRuntimeGetVersion');
+  cudaSetDevice := CUDARTGetProcAddress('cudaSetDevice');
+  cudaSetDeviceFlags := CUDARTGetProcAddress('cudaSetDeviceFlags');
+  cudaSetDoubleForDevice := CUDARTGetProcAddress('cudaSetDoubleForDevice');
+  cudaSetDoubleForHost := CUDARTGetProcAddress('cudaSetDoubleForHost');
+  cudaSetupArgument := CUDARTGetProcAddress('cudaSetupArgument');
+  cudaSetValidDevices := CUDARTGetProcAddress('cudaSetValidDevices');
+  cudaStreamCreate := CUDARTGetProcAddress('cudaStreamCreate');
+  cudaStreamDestroy := CUDARTGetProcAddress('cudaStreamDestroy');
+  cudaStreamQuery := CUDARTGetProcAddress('cudaStreamQuery');
+  cudaStreamSynchronize := CUDARTGetProcAddress('cudaStreamSynchronize');
+  cudaThreadExit := CUDARTGetProcAddress('cudaThreadExit');
+  cudaThreadSynchronize := CUDARTGetProcAddress('cudaThreadSynchronize');
+  cudaThreadSetLimit := CUDARTGetProcAddress('cudaThreadSetLimit');
+  cudaThreadGetLimit := CUDARTGetProcAddress('cudaThreadGetLimit');
+  cudaUnbindTexture := CUDARTGetProcAddress('cudaUnbindTexture');
+  cudaWGLGetDevice := CUDARTGetProcAddress('cudaWGLGetDevice');
+
+  cudaRuntimeGetVersion(V);
+  GLSLogger.LogInfoFmt('%s version %d is loaded', [LibName, V]);
+  Result := True;
+end;
+
+function IsCUDARTInitialized: Boolean;
+begin
+  Result := (CUDARTHandle <> INVALID_MODULEHANDLE);
+end;
+
+//-----------------------------------------------
+initialization
+//-----------------------------------------------
+
+//-----------------------------------------------
+finalization
+//-----------------------------------------------
+
+CloseCUDART;
+
+end.

+ 157 - 189
Source/GLS.CUDAUtility.pas → Source/GPU.CUDAUtility.pas

@@ -1,189 +1,157 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAUtility;
-
-(*
-    GLScene CUDA Utility
-    Wraper of cutil.
-*)
-
-interface
-
-{$IFDEF MSWINDOWS}
-uses
-  Winapi.Windows;
-{$ENDIF}
-
-
-const
-{$IFDEF WIN64}
-  CUTILDLL = 'cutil64.dll';
-{$ELSE}
-  CUTILDLL = 'cutil32.dll';
-{$ENDIF}
-
-var
-  cutFindFilePath: function(const filename: PAnsiChar; const executablePath: PAnsiChar): PAnsiChar;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutLoadPGMf: function(const filename: PAnsiChar; var data: System.PSingle; var w: Integer; var h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutSavePGMf: function(const filename: PAnsiChar; data: System.PSingle; w: Integer; h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutLoadPGMub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutLoadPPMub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutLoadPPM4ub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutLoadPGMi: function(const filename: PAnsiChar; var data: PInteger; var w: Integer; var h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutLoadPGMs: function(const filename: PAnsiChar; var data: PWord; var w: Integer; var h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutSavePGMub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutSavePPMub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutSavePPM4ub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutSavePGMi: function(const filename: PAnsiChar; data: PInteger; w: Integer; h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutSavePGMs: function(const filename: PAnsiChar; data: PWord; w: Integer; h: Integer): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutComparef: function(const reference: PSingle; const data: PSingle; const len: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutComparei: function(const reference: PInteger; const data: PInteger; const len: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutCompareuit: function(const reference: PInteger; const data: PInteger; const len: Cardinal; const epsilon: Single;
-    const threshold: Single): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutCompareub: function(const reference: PByte; const data: PByte; const len: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutCompareubt: function(const reference: PByte; const data: PByte; const len: Cardinal; const epsilon: Single;
-    const threshold: Single): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutCompareube: function(const reference: PByte; const data: PByte; const len: Cardinal; const epsilon: Single): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutComparefe: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutComparefet: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single;
-    const threshold: Single): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutCompareL2fe: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutCreateTimer: function(var name: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutStartTimer: function(const name: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutStopTimer: function(const name: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutResetTimer: function(const name: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutDeleteTimer: function(const name: Cardinal): Boolean;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutGetTimerValue: function(const name: Cardinal): Single;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutGetAverageTimerValue: function(const name: Cardinal): Single;
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-  cutFree: procedure(ptr: Pointer);
-{$IFDEF MSWINDOWS}stdcall; {$ELSE}cdecl; {$ENDIF}
-
-function InitCUTIL: Boolean;
-procedure CloseCUTIL;
-function InitCUTILFromLibrary(const LibName: WideString): Boolean;
-function IsCUTILInitialized: Boolean;
-
-// ------------------------------------------------------
-implementation
-// ------------------------------------------------------
-
-const
-  INVALID_MODULEHANDLE = 0;
-
-{$IFDEF MSWINDOWS}
-// ************** Windows specific ********************
-var
-  CUTILHandle: HINST = INVALID_MODULEHANDLE;
-{$ELSE}
-// ************** UNIX specific ********************
-var
-  CUTILHandle: TLibHandle = INVALID_MODULEHANDLE;
-{$ENDIF}
-
-function CUTILGetProcAddress(ProcName: PAnsiChar): Pointer;
-begin
-  result := GetProcAddress(Cardinal(CUTILHandle), ProcName);
-end;
-
-function InitCUTIL: Boolean;
-begin
-  if CUTILHandle = INVALID_MODULEHANDLE then
-    result := InitCUTILFromLibrary(CUTILDLL)
-  else
-    result := True;
-end;
-
-procedure CloseCUTIL;
-begin
-  if CUTILHandle <> INVALID_MODULEHANDLE then
-  begin
-    FreeLibrary(Cardinal(CUTILHandle));
-    CUTILHandle := INVALID_MODULEHANDLE;
-  end;
-end;
-
-function InitCUTILFromLibrary(const LibName: WideString): Boolean;
-begin
-  result := False;
-  CloseCUTIL;
-  CUTILHandle := LoadLibraryW(PWideChar(LibName));
-  if CUTILHandle = INVALID_MODULEHANDLE then
-    Exit;
-  cutFindFilePath := CUTILGetProcAddress('cutFindFilePath');
-  cutLoadPGMf := CUTILGetProcAddress('cutLoadPGMf');
-  cutSavePGMf := CUTILGetProcAddress('cutSavePGMf');
-  cutLoadPGMub := CUTILGetProcAddress('cutLoadPGMub');
-  cutLoadPPMub := CUTILGetProcAddress('cutLoadPPMub');
-  cutLoadPPM4ub := CUTILGetProcAddress('cutLoadPPM4ub');
-  cutLoadPGMi := CUTILGetProcAddress('cutLoadPGMi');
-  cutLoadPGMs := CUTILGetProcAddress('cutLoadPGMs');
-  cutSavePGMub := CUTILGetProcAddress('cutSavePGMub');
-  cutSavePPMub := CUTILGetProcAddress('cutSavePPMub');
-  cutSavePPM4ub := CUTILGetProcAddress('cutSavePPM4ub');
-  cutSavePGMi := CUTILGetProcAddress('cutSavePGMi');
-  cutSavePGMs := CUTILGetProcAddress('cutSavePGMs');
-  cutComparef := CUTILGetProcAddress('cutComparef');
-  cutComparei := CUTILGetProcAddress('cutComparei');
-  cutCompareuit := CUTILGetProcAddress('cutCompareuit');
-  cutCompareub := CUTILGetProcAddress('cutCompareub');
-  cutCompareubt := CUTILGetProcAddress('cutCompareubt');
-  cutCompareube := CUTILGetProcAddress('cutCompareube');
-  cutComparefe := CUTILGetProcAddress('cutComparefe');
-  cutComparefet := CUTILGetProcAddress('cutComparefet');
-  cutCompareL2fe := CUTILGetProcAddress('cutCompareL2fe');
-  cutCreateTimer := CUTILGetProcAddress('cutCreateTimer');
-  cutStartTimer := CUTILGetProcAddress('cutStartTimer');
-  cutStopTimer := CUTILGetProcAddress('cutStopTimer');
-  cutResetTimer := CUTILGetProcAddress('cutResetTimer');
-  cutDeleteTimer := CUTILGetProcAddress('cutDeleteTimer');
-  cutGetTimerValue := CUTILGetProcAddress('cutGetTimerValue');
-  cutGetAverageTimerValue := CUTILGetProcAddress('cutGetAverageTimerValue');
-  cutFree := CUTILGetProcAddress('cutFree');
-  result := True;
-end;
-
-function IsCUTILInitialized: Boolean;
-begin
-  result := (CUTILHandle <> INVALID_MODULEHANDLE);
-end;
-
-//-----------------------------------------------
-initialization
-//-----------------------------------------------
-
-finalization
-
-  CloseCUTIL;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit GPU.CUDAUtility;
+
+(*
+    GLScene CUDA Utility
+    Wraper of cutil.
+*)
+
+interface
+
+uses
+  Winapi.Windows;
+
+
+const
+{$IFDEF WIN64}
+  CUTILDLL = 'cutil64.dll';
+{$ELSE}
+  CUTILDLL = 'cutil32.dll';
+{$ENDIF}
+
+var
+  cutFindFilePath: function(const filename: PAnsiChar; const executablePath: PAnsiChar): PAnsiChar;stdcall;
+  cutLoadPGMf: function(const filename: PAnsiChar; var data: System.PSingle; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutSavePGMf: function(const filename: PAnsiChar; data: System.PSingle; w: Integer; h: Integer): Boolean;stdcall;
+  cutLoadPGMub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPPMub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPPM4ub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPGMi: function(const filename: PAnsiChar; var data: PInteger; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPGMs: function(const filename: PAnsiChar; var data: PWord; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutSavePGMub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePPMub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePPM4ub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePGMi: function(const filename: PAnsiChar; data: PInteger; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePGMs: function(const filename: PAnsiChar; data: PWord; w: Integer; h: Integer): Boolean;stdcall;
+  cutComparef: function(const reference: PSingle; const data: PSingle; const len: Cardinal): Boolean;stdcall;
+  cutComparei: function(const reference: PInteger; const data: PInteger; const len: Cardinal): Boolean;stdcall;
+  cutCompareuit: function(const reference: PInteger; const data: PInteger; const len: Cardinal; const epsilon: Single;
+    const threshold: Single): Boolean;stdcall;
+  cutCompareub: function(const reference: PByte; const data: PByte; const len: Cardinal): Boolean;stdcall;
+  cutCompareubt: function(const reference: PByte; const data: PByte; const len: Cardinal; const epsilon: Single;
+    const threshold: Single): Boolean;stdcall;
+  cutCompareube: function(const reference: PByte; const data: PByte; const len: Cardinal; const epsilon: Single): Boolean;stdcall;
+  cutComparefe: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single): Boolean;stdcall;
+  cutComparefet: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single;
+    const threshold: Single): Boolean;stdcall;
+  cutCompareL2fe: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single): Boolean;stdcall;
+  cutCreateTimer: function(var name: Cardinal): Boolean;stdcall;
+  cutStartTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutStopTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutResetTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutDeleteTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutGetTimerValue: function(const name: Cardinal): Single;stdcall;
+  cutGetAverageTimerValue: function(const name: Cardinal): Single;stdcall;
+  cutFree: procedure(ptr: Pointer);stdcall;
+
+function InitCUTIL: Boolean;
+procedure CloseCUTIL;
+function InitCUTILFromLibrary(const LibName: WideString): Boolean;
+function IsCUTILInitialized: Boolean;
+
+// ------------------------------------------------------
+implementation
+// ------------------------------------------------------
+
+const
+  INVALID_MODULEHANDLE = 0;
+
+{$IFDEF MSWINDOWS}
+// ************** Windows specific ********************
+var
+  CUTILHandle: HINST = INVALID_MODULEHANDLE;
+{$ELSE}
+// ************** UNIX specific ********************
+var
+  CUTILHandle: TLibHandle = INVALID_MODULEHANDLE;
+{$ENDIF}
+
+function CUTILGetProcAddress(ProcName: PAnsiChar): Pointer;
+begin
+  result := GetProcAddress(Cardinal(CUTILHandle), ProcName);
+end;
+
+function InitCUTIL: Boolean;
+begin
+  if CUTILHandle = INVALID_MODULEHANDLE then
+    result := InitCUTILFromLibrary(CUTILDLL)
+  else
+    result := True;
+end;
+
+procedure CloseCUTIL;
+begin
+  if CUTILHandle <> INVALID_MODULEHANDLE then
+  begin
+    FreeLibrary(Cardinal(CUTILHandle));
+    CUTILHandle := INVALID_MODULEHANDLE;
+  end;
+end;
+
+function InitCUTILFromLibrary(const LibName: WideString): Boolean;
+begin
+  result := False;
+  CloseCUTIL;
+  CUTILHandle := LoadLibraryW(PWideChar(LibName));
+  if CUTILHandle = INVALID_MODULEHANDLE then
+    Exit;
+  cutFindFilePath := CUTILGetProcAddress('cutFindFilePath');
+  cutLoadPGMf := CUTILGetProcAddress('cutLoadPGMf');
+  cutSavePGMf := CUTILGetProcAddress('cutSavePGMf');
+  cutLoadPGMub := CUTILGetProcAddress('cutLoadPGMub');
+  cutLoadPPMub := CUTILGetProcAddress('cutLoadPPMub');
+  cutLoadPPM4ub := CUTILGetProcAddress('cutLoadPPM4ub');
+  cutLoadPGMi := CUTILGetProcAddress('cutLoadPGMi');
+  cutLoadPGMs := CUTILGetProcAddress('cutLoadPGMs');
+  cutSavePGMub := CUTILGetProcAddress('cutSavePGMub');
+  cutSavePPMub := CUTILGetProcAddress('cutSavePPMub');
+  cutSavePPM4ub := CUTILGetProcAddress('cutSavePPM4ub');
+  cutSavePGMi := CUTILGetProcAddress('cutSavePGMi');
+  cutSavePGMs := CUTILGetProcAddress('cutSavePGMs');
+  cutComparef := CUTILGetProcAddress('cutComparef');
+  cutComparei := CUTILGetProcAddress('cutComparei');
+  cutCompareuit := CUTILGetProcAddress('cutCompareuit');
+  cutCompareub := CUTILGetProcAddress('cutCompareub');
+  cutCompareubt := CUTILGetProcAddress('cutCompareubt');
+  cutCompareube := CUTILGetProcAddress('cutCompareube');
+  cutComparefe := CUTILGetProcAddress('cutComparefe');
+  cutComparefet := CUTILGetProcAddress('cutComparefet');
+  cutCompareL2fe := CUTILGetProcAddress('cutCompareL2fe');
+  cutCreateTimer := CUTILGetProcAddress('cutCreateTimer');
+  cutStartTimer := CUTILGetProcAddress('cutStartTimer');
+  cutStopTimer := CUTILGetProcAddress('cutStopTimer');
+  cutResetTimer := CUTILGetProcAddress('cutResetTimer');
+  cutDeleteTimer := CUTILGetProcAddress('cutDeleteTimer');
+  cutGetTimerValue := CUTILGetProcAddress('cutGetTimerValue');
+  cutGetAverageTimerValue := CUTILGetProcAddress('cutGetAverageTimerValue');
+  cutFree := CUTILGetProcAddress('cutFree');
+  result := True;
+end;
+
+function IsCUTILInitialized: Boolean;
+begin
+  result := (CUTILHandle <> INVALID_MODULEHANDLE);
+end;
+
+//-----------------------------------------------
+initialization
+//-----------------------------------------------
+
+finalization
+
+  CloseCUTIL;
+
+end.

+ 2676 - 2954
Source/GLS.CUDAApi.pas → Source/Import.CUDAApi.pas

@@ -1,2954 +1,2676 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAApi;
-
-(*
- * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO USER:
- *
- * This source code is subject to NVIDIA ownership rights under U.S. and
- * international Copyright laws.  Users and possessors of this source code
- * are hereby granted a nonexclusive, royalty-free license to use this code
- * in individual and commercial software.
- *
- * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
- * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
- * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
- * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
- * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
- * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
- * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
- * OR PERFORMANCE OF THIS SOURCE CODE.
- *
- * U.S. Government End Users.   This source code is a "commercial item" as
- * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
- * "commercial computer  software"  and "commercial computer software
- * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
- * and is provided to the U.S. Government only as a commercial end item.
- * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
- * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
- * source code with only those rights set forth herein.
- *
- * Any use of this source code in individual and commercial software must
- * include, in the user documentation and internal comments to the code,
- * the above Disclaimer and U.S. Government End Users Notice.
- *)
-
-interface
-
-{$I GLScene.inc}
-
-uses
-  Winapi.Windows
-  {$IFDEF USE_LOGGING},GLS.Logger;{$ELSE};{$ENDIF}
-
-const
-  CUDAAPIDLL = 'nvcuda.dll';
-
-type
-  // CUDA device pointer
-  TCUdeviceptr = Pointer;
-
-  // CUDA device
-  TCUdevice = Integer;
-
-  // CUDA context
-  PCUcontext = ^TCUcontext;
-  TCUcontext = record
-  end;
-
-  // CUDA module
-  PCUmodule = ^TCUmodule;
-  TCUmodule = record
-  end;
-
-  // CUDA function
-  PCUfunction = ^TCUfunction;
-  TCUfunction = record
-  end;
-
-  // CUDA array
-  PCUarray = ^TCUarray;
-  TCUarray = record
-  end;
-
-  // CUDA texture reference
-  PCUtexref = ^TCUtexref;
-  TCUtexref = record
-  end;
-
-  // CUDA event
-  PCUevent = ^TCUevent;
-  TCUevent = record
-  end;
-
-  // CUDA stream
-  PCUstream = ^TCUstream;
-  TCUstream = record
-  end;
-
-  // CUDA graphics interop resource
-  PPCUgraphicsResource = ^PCUgraphicsResource;
-  PCUgraphicsResource = ^TCUgraphicsResource;
-  TCUgraphicsResource = record
-  end;
-
-  // Context creation flags
-  TCUctx_flags = (
-    // Automatic scheduling
-    CU_CTX_SCHED_AUTO = 0,
-    // Set spin as default scheduling
-    CU_CTX_SCHED_SPIN = 1,
-    // Set yield as default scheduling
-    CU_CTX_SCHED_YIELD = 2,
-    CU_CTX_SCHED_MASK = 3, 
-	// Use blocking synchronization
-	CU_CTX_BLOCKING_SYNC = 4,
-    // Support mapped pinned allocations
-    CU_CTX_MAP_HOST = 8,
-    CU_CTX_FLAGS_MASK = 15);
-
-  // Event creation flags
-  TCUevent_flags = (
-    // Default event flag
-    CU_EVENT_DEFAULT = 0,
-    // Event uses blocking synchronization
-    CU_EVENT_BLOCKING_SYNC = 1
-    );
-
-  // Array formats
-  TCUarray_format = (
-    // Unsigned 8-bit integers
-    CU_AD_FORMAT_UNSIGNED_INT8 = $01,
-    // Unsigned 16-bit integers
-    CU_AD_FORMAT_UNSIGNED_INT16 = $02,
-    // Unsigned 32-bit integers
-    CU_AD_FORMAT_UNSIGNED_INT32 = $03,
-    // Signed 8-bit integers
-    CU_AD_FORMAT_SIGNED_INT8 = $08,
-    // Signed 16-bit integers   
-    CU_AD_FORMAT_SIGNED_INT16 = $09,
-    // Signed 32-bit integers
-    CU_AD_FORMAT_SIGNED_INT32 = $0A,
-    // 16-bit floating point
-    CU_AD_FORMAT_HALF = $10,
-    // 32-bit floating point
-    CU_AD_FORMAT_FLOAT = $20
-    );
-
-  // Texture reference addressing modes
-  TCUaddress_mode = (
-    // Wrapping address mode
-    CU_TR_ADDRESS_MODE_WRAP = 0,
-    // Clamp to edge address mode
-    CU_TR_ADDRESS_MODE_CLAMP = 1,
-    // Mirror address mode
-    CU_TR_ADDRESS_MODE_MIRROR = 2
-    );
-
-  // Texture reference filtering modes
-  TCUfilter_mode = (
-    // Point filter mode
-	CU_TR_FILTER_MODE_POINT = 0,
-    // Linear filter mode
-    CU_TR_FILTER_MODE_LINEAR = 1
-    );
-
-  // Device properties
-  TCUdevice_attribute = (
-    // Maximum number of threads per block
-    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
-    // Maximum block dimension X
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
-    // Maximum block dimension Y
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
-    // Maximum block dimension Z
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
-    // Maximum grid dimension X
-    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
-    // Maximum grid dimension Y
-    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
-    // Maximum grid dimension Z
-    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
-    // Maximum shared memory available per block in bytes
-    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
-    // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
-    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
-    // Memory available on device for __constant__ variables in a CUDA C kernel in bytes
-    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
-    // Warp size in threads
-    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
-    // Maximum pitch in bytes allowed by memory copies
-    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
-    // Maximum number of 32-bit registers available per block
-    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
-    // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
-    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
-    // Peak clock frequency in kilohertz
-    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
-    // Alignment requirement for textures
-    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
-    // Device can possibly copy memory and execute a kernel concurrently
-    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
-    // Number of multiprocessors on device    
-    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
-    // Specifies whether there is a run time limit on kernels
-    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
-    // Device is integrated with host memory
-    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
-    // Device can map host memory into CUDA address space
-    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
-    // Compute mode (See ::CUcomputemode for details)
-    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
-    );
-
-  (* *
-    * CUDA Limits
-  *)
-  TcudaLimit = (
-    // GPU thread stack size
-	cudaLimitStackSize = $00,
-    // GPU printf FIFO size
-    cudaLimitPrintfFifoSize = $01
-    );
-
-  // Legacy device properties
-  TCUdevprop = record
-    // Maximum number of threads per block
-    maxThreadsPerBlock: Integer;
-    // Maximum size of each dimension of a block
-	maxThreadsDim: array [0 .. 2] of Integer;
-    // Maximum size of each dimension of a grid
-    maxGridSize: array [0 .. 2] of Integer;
-    // Shared memory available per block in bytes
-    sharedMemPerBlock: Integer;
-    // Constant memory available on device in bytes
-    totalConstantMemory: Integer;
-    // Warp size in threads
-    SIMDWidth: Integer;
-    // Maximum pitch in bytes allowed by memory copies
-    memPitch: Integer;
-    // 32-bit registers available per block
-    regsPerBlock: Integer;
-    // Clock frequency in kilohertz
-    clockRate: Integer;
-    // Alignment requirement for textures
-    textureAlign: Integer;
-  end;
-
-  // Function properties
-  TCUfunction_attribute = (
-
-    (* The number of threads beyond which a launch of the function would fail.
-     * This number depends on both the function and the device on which the
-     * function is currently loaded. *)
-    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
-
-    (* The size in bytes of statically-allocated shared memory required by
-     * this function. This does not include dynamically-allocated shared
-     * memory requested by the user at runtime. *)
-    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
-
-    { * The size in bytes of user-allocated constant memory required by this
-      * function. }
-    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
-
-    { * The size in bytes of thread local memory used by this function. }
-    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
-
-    { * The number of registers used by each thread of this function. }
-    CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
-
-    CU_FUNC_ATTRIBUTE_MAX);
-
-  // Memory types
-  TCUmemorytype = (
-    // Host memory
-	CU_MEMORYTYPE_HOST = $01,
-    // Device memory
-    CU_MEMORYTYPE_DEVICE = $02,
-    // Array memory
-    CU_MEMORYTYPE_ARRAY = $03
-    );
-
-  // Compute Modes
-  TCUcomputemode = (
-    // Default compute mode (Multiple contexts allowed per device)
-	CU_COMPUTEMODE_DEFAULT = 0,
-    // Compute-exclusive mode (Only one context can be present on this device at a time)
-    CU_COMPUTEMODE_EXCLUSIVE = 1,
-    // Compute-prohibited mode (No contexts can be created on this device at this time)
-    CU_COMPUTEMODE_PROHIBITED = 2
-    );
-
-  // Online compiler options
-  TCUjit_option = (
-    { * Max number of registers that a thread may use. }
-
-    CU_JIT_MAX_REGISTERS = 0,
-
-    { * IN: Specifies minimum number of threads per block to target compilation
-      * for\n
-      * OUT: Returns the number of threads the compiler actually targeted.
-      * This restricts the resource utilization fo the compiler (e.g. max
-      * registers) such that a block with the given number of threads should be
-      * able to launch based on register limitations. Note, this option does not
-      * currently take into account any other resource limitations, such as
-      * shared memory utilization. }
-    CU_JIT_THREADS_PER_BLOCK,
-
-    { * Returns a float value in the option of the wall clock time, in
-      * milliseconds, spent creating the cubin }
-    CU_JIT_WALL_TIME,
-
-    { * Pointer to a buffer in which to print any log messsages from PTXAS
-      * that are informational in nature }
-    CU_JIT_INFO_LOG_BUFFER,
-
-    { * IN: Log buffer size in bytes.  Log messages will be capped at this size
-      * (including null terminator)\n
-      * OUT: Amount of log buffer filled with messages }
-    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
-
-    { * Pointer to a buffer in which to print any log messages from PTXAS that
-      * reflect errors }
-    CU_JIT_ERROR_LOG_BUFFER,
-
-    { * IN: Log buffer size in bytes.  Log messages will be capped at this size
-      * (including null terminator)\n
-      * OUT: Amount of log buffer filled with messages }
-    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
-
-    { * Level of optimizations to apply to generated code (0 - 4), with 4
-      * being the default and highest level of optimizations. }
-    CU_JIT_OPTIMIZATION_LEVEL,
-
-    { * No option value required. Determines the target based on the current
-      * attached context (default) }
-    CU_JIT_TARGET_FROM_CUCONTEXT,
-
-    { * Target is chosen based on supplied CUjit_target_enum. }
-    CU_JIT_TARGET,
-
-    { * Specifies choice of fallback strategy if matching cubin is not found.
-      * Choice is based on supplied CUjit_fallback_enum. }
-    CU_JIT_FALLBACK_STRATEGY );
-
-  // Online compilation targets
-  TCUjit_target = (
-    // Compute device class 1.0
-    CU_TARGET_COMPUTE_10 = 0,
-	// Compute device class 1.1
-    CU_TARGET_COMPUTE_11,
-    // Compute device class 1.2
-    CU_TARGET_COMPUTE_12,
-    // Compute device class 1.3
-    CU_TARGET_COMPUTE_13
-    );
-
-  // Cubin matching fallback strategies
-  TCUjit_fallback = (
-    // ** Prefer to compile ptx */
-    CU_PREFER_PTX = 0,
-    // ** Prefer to fall back to compatible binary code */
-    CU_PREFER_BINARY);
-
-  // Flags to register a graphics resource
-  TCUgraphicsRegisterFlags = (CU_GRAPHICS_REGISTER_FLAGS_NONE = $00000000);
-
-  // Flags for mapping and unmapping interop resources
-  TCUgraphicsMapResourceFlags =
-    (CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = $00000000,
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = $00000001,
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = $00000002);
-
-  // Array indices for cube faces
-  TCUarray_cubemap_face = (
-    // Positive X face of cubemap
-	CU_CUBEMAP_FACE_POSITIVE_X = $00000000,
-    // Negative X face of cubemap
-    CU_CUBEMAP_FACE_NEGATIVE_X = $00000001,
-    // Positive Y face of cubemap
-    CU_CUBEMAP_FACE_POSITIVE_Y = $00000002,
-    // Negative Y face of cubemap
-    CU_CUBEMAP_FACE_NEGATIVE_Y = $00000003,
-    // Positive Z face of cubemap
-    CU_CUBEMAP_FACE_POSITIVE_Z = $00000004,
-    // Negative Z face of cubemap
-    CU_CUBEMAP_FACE_NEGATIVE_Z = $00000005
-    );
-
-  (*
-    * CUDA function attributes
-  *)
-
-  TcudaFuncAttributes = record
-    // Size of shared memory in bytes
-    sharedSizeBytes: NativeUInt;
-	// Size of constant memory in bytes
-    constSizeBytes: NativeUInt;
-    // Size of local memory in bytes
-    localSizeBytes: NativeUInt;
-    // Maximum number of threads per block
-    maxThreadsPerBlock: Integer;
-    // Number of registers used
-    numRegs: Integer;
-    
-    (* \brief PTX virtual architecture version for which the function was
-      *  compiled. This value is the major PTX version * 10 + the minor PTX
-      *  version, so a PTX version 1.3 function would return the value 13.
-      *  For device emulation kernels, this is set to 9999. *)
-    ptxVersion: Integer;
-    (* * \brief Binary architecture version for which the function was compiled.
-      *  This value is the major binary version * 10 + the minor binary version,
-      *  so a binary version 1.3 function would return the value 13.
-      *  For device emulation kernels, this is set to 9999. *)
-    binaryVersion: Integer;
-    __cudaReserved: array [0 .. 5] of Integer;
-  end;
-
-  (* *
-    * CUDA function cache configurations
-  *)
-
-  TcudaFuncCache = (
-    // Default function cache configuration, no preference
-	cudaFuncCachePreferNone = 0,
-    // Prefer larger shared memory and smaller L1 cache
-    cudaFuncCachePreferShared = 1,
-    // Prefer larger L1 cache and smaller shared memory
-    cudaFuncCachePreferL1 = 2
-    );
-
-  // ************************************
-  // **
-  // **    Error codes
-  // **
-  // ***********************************/
-
-  // Error codes
-
-  TCUresult = type Cardinal;
-
-const
-  CUDA_SUCCESS: TCUresult = 0;          /// < No errors
-  CUDA_ERROR_INVALID_VALUE = 1;         /// < Invalid value
-  CUDA_ERROR_OUT_OF_MEMORY = 2;         /// < Out of memory
-  CUDA_ERROR_NOT_INITIALIZED = 3;       /// < Driver not initialized
-  CUDA_ERROR_DEINITIALIZED = 4;         /// < Driver deinitialized
-
-  CUDA_ERROR_NO_DEVICE = 100;           /// < No CUDA-capable device available
-  CUDA_ERROR_INVALID_DEVICE = 101;      /// < Invalid device
-
-  CUDA_ERROR_INVALID_IMAGE = 200;       /// < Invalid kernel image
-  CUDA_ERROR_INVALID_CONTEXT = 201;     /// < Invalid context
-  CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202;  /// < Context already current
-  CUDA_ERROR_MAP_FAILED = 205;          /// < Map failed
-  CUDA_ERROR_UNMAP_FAILED = 206;        /// < Unmap failed
-  CUDA_ERROR_ARRAY_IS_MAPPED = 207;     /// < Array is mapped
-  CUDA_ERROR_ALREADY_MAPPED = 208;      /// < Already mapped
-  CUDA_ERROR_NO_BINARY_FOR_GPU = 209;   /// < No binary for GPU
-  CUDA_ERROR_ALREADY_ACQUIRED = 210;    /// < Already acquired
-  CUDA_ERROR_NOT_MAPPED = 211;          /// < Not mapped
-  CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212;    /// < Mapped resource not available for access as an array
-  CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213;  /// < Mapped resource not available for access as a pointer
-
-  CUDA_ERROR_INVALID_SOURCE = 300;       /// < Invalid source
-  CUDA_ERROR_FILE_NOT_FOUND = 301;       /// < File not found
-
-  CUDA_ERROR_INVALID_HANDLE = 400;       /// < Invalid handle
-
-  CUDA_ERROR_NOT_FOUND = 500;            /// < Not found
-
-  CUDA_ERROR_NOT_READY = 600;            /// < CUDA not ready
-
-  CUDA_ERROR_LAUNCH_FAILED = 700;           /// < Launch failed
-  CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701; /// < Launch exceeded resources
-  CUDA_ERROR_LAUNCH_TIMEOUT = 702;          /// < Launch exceeded timeout
-  CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703; /// < Launch with incompatible texturing
-
-  CUDA_ERROR_POINTER_IS_64BIT = 800;        /// < Attempted to retrieve 64-bit pointer via 32-bit API function
-  CUDA_ERROR_SIZE_IS_64BIT = 801;           /// < Attempted to retrieve 64-bit size via 32-bit API function
-
-  CUDA_ERROR_UNKNOWN = 999;                 /// < Unknown error
-
-const
-
-  { * If set, host memory is portable between CUDA contexts.
-    * Flag for ::cuMemHostAlloc() }
-  CU_MEMHOSTALLOC_PORTABLE = $01;
-
-  { * If set, host memory is mapped into CUDA address space and
-    * ::cuMemHostGetDevicePointer() may be called on the host pointer.
-    * Flag for ::cuMemHostAlloc() }
-  CU_MEMHOSTALLOC_DEVICEMAP = $02;
-
-  { * If set, host memory is allocated as write-combined - fast to write,
-    * faster to DMA, slow to read except via SSE4 streaming load instruction
-    * (MOVNTDQA).
-    * Flag for ::cuMemHostAlloc() }
-  CU_MEMHOSTALLOC_WRITECOMBINED = $04;
-
-  // 2D memory copy parameters
-type
-
-  PCUDA_MEMCPY2D = ^TCUDA_MEMCPY2D;
-
-  TCUDA_MEMCPY2D = record
-    srcXInBytes,       /// < Source X in bytes
-    srcY: Cardinal;    /// < Source Y
-
-    srcMemoryType: TCUmemorytype;  /// < Source memory type (host, device, array)
-    srcHost: Pointer;              /// < Source host pointer
-    srcDevice: TCUdeviceptr;       /// < Source device pointer
-    srcArray: PCUarray;            /// < Source array reference
-    srcPitch: Cardinal;            /// < Source pitch (ignored when src is array)
-
-    dstXInBytes,                   /// < Destination X in bytes
-    dstY: Cardinal;                /// < Destination Y
-    dstMemoryType: TCUmemorytype;  /// < Destination memory type (host, device, array)
-    dstHost: Pointer;              /// < Destination host pointer
-    dstDevice: TCUdeviceptr;       /// < Destination device pointer
-    dstArray: PCUarray;            /// < Destination array reference
-    dstPitch: Cardinal;            /// < Destination pitch (ignored when dst is array)
-
-    WidthInBytes: Cardinal;        /// < Width of 2D memory copy in bytes
-    Height: Cardinal;              /// < Height of 2D memory copy
-  end;
-
-  // 3D memory copy parameters
-  TCUDA_MEMCPY3D = record
-    srcXInBytes,                        /// < Source X in bytes
-    srcY,                               /// < Source Y
-    srcZ: Cardinal;                     /// < Source Z
-    srcLOD: Cardinal;                   /// < Source LOD
-    srcMemoryType: TCUmemorytype;       /// < Source memory type (host, device, array)
-    srcHost: Pointer;                   /// < Source host pointer
-    srcDevice: TCUdeviceptr;            /// < Source device pointer
-    srcArray: PCUarray;                 /// < Source array reference
-    reserved0: Pointer;                 /// < Must be NULL
-    srcPitch: Cardinal;                 /// < Source pitch (ignored when src is array)
-    srcHeight: Cardinal;                /// < Source height (ignored when src is array; may be 0 if Depth==1)
-
-    dstXInBytes,                        /// < Destination X in bytes
-    dstY,                               /// < Destination Y
-    dstZ: Cardinal;                     /// < Destination Z
-    dstLOD: Cardinal;                   /// < Destination LOD
-    dstMemoryType: TCUmemorytype;       /// < Destination memory type (host, device, array)
-    dstHost: Pointer;                   /// < Destination host pointer
-    dstDevice: TCUdeviceptr;            /// < Destination device pointer
-    dstArray: PCUarray;                 /// < Destination array reference
-    reserved1: Pointer;                 /// < Must be NULL
-    dstPitch: Cardinal;                 /// < Destination pitch (ignored when dst is array)
-    dstHeight: Cardinal;                /// < Destination height (ignored when dst is array; may be 0 if Depth==1)
-
-    WidthInBytes: Cardinal;             /// < Width of 3D memory copy in bytes
-    Height: Cardinal;                   /// < Height of 3D memory copy
-    Depth: Cardinal;                    /// < Depth of 3D memory copy
-  end;
-
-  // Array descriptor
-  PCUDA_ARRAY_DESCRIPTOR = ^TCUDA_ARRAY_DESCRIPTOR;
-
-  TCUDA_ARRAY_DESCRIPTOR = record
-    Width: Cardinal;                    /// < Width of array
-    Height: Cardinal;                   /// < Height of array
-    Format: TCUarray_format;            /// < Array format
-    NumChannels: Cardinal;              /// < Channels per array element
-  end;
-
-  // 3D array descriptor
-  TCUDA_ARRAY3D_DESCRIPTOR = record
-    Width: Cardinal;                     /// < Width of 3D array
-    Height: Cardinal;                    /// < Height of 3D array
-    Depth: Cardinal;                     /// < Depth of 3D array
-    Format: TCUarray_format;             /// < Array format
-    NumChannels: Cardinal;               /// < Channels per array element
-    Flags: Cardinal;                     /// < Flags
-  end;
-
-  // Flags to map or unmap a resource
-  TCUGLmap_flags = (CU_GL_MAP_RESOURCE_FLAGS_NONE,
-    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY, CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
-
-const
-  { * Override the texref format with a format inferred from the array.
-    * Flag for ::cuTexRefSetArray() }
-  CU_TRSA_OVERRIDE_FORMAT = $01;
-
-  { * Read the texture as integers rather than promoting the values to floats
-    * in the range [0,1].
-    * Flag for ::cuTexRefSetFlags() }
-  CU_TRSF_READ_AS_INTEGER = $01;
-
-  { * Use normalized texture coordinates in the range [0,1) instead of [0,dim).
-    * Flag for ::cuTexRefSetFlags() }
-  CU_TRSF_NORMALIZED_COORDINATES = $02;
-
-  { * For texture references loaded into the module, use default texunit from
-    * texture reference. }
-  CU_PARAM_TR_DEFAULT = -1;
-
-type
-  TDim3 = array [0 .. 2] of LongWord;
-
-{$IFDEF MSWINDOWS}
-type
-  HGPUNV = Pointer;
-{$ENDIF}
-
-type
-  TcuInit = function(Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDriverGetVersion = function(out driverVersion: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceGet = function(var device: TCUdevice; ordinal: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceGetCount = function(var count: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceGetName = function(name: PAnsiChar; len: Integer; dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceComputeCapability = function(var major: Integer; var minor: Integer;
-    dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceTotalMem = function(bytes: PSize_t; dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceGetProperties = function(var prop: TCUdevprop; dev: TCUdevice)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuDeviceGetAttribute = function(pi: PSize_t; attrib: TCUdevice_attribute;
-    dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxCreate = function(var pctx: PCUcontext; Flags: Cardinal; dev: TCUdevice)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxDestroy = function(ctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxAttach = function(var pctx: PCUcontext; Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxDetach = function(ctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxPushCurrent = function(ctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxPopCurrent = function(var pctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxGetDevice = function(var device: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuCtxSynchronize = function: TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleLoad = function(var module: PCUmodule; const fname: PAnsiChar)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleLoadData = function(var module: PCUmodule; const image: PAnsiChar)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleLoadDataEx = function(var module: PCUmodule; var image;
-    numOptions: Cardinal; var options: TCUjit_option; var optionValues)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleLoadFatBinary = function(var module: PCUmodule; var fatCubin)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleUnload = function(hmod: PCUmodule): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleGetFunction = function(out hfunc: PCUfunction; hmod: PCUmodule;
-    const name: PAnsiChar): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleGetGlobal = function(out dptr: TCUdeviceptr; var bytes: Cardinal;
-    hmod: PCUmodule; const name: PAnsiChar): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuModuleGetTexRef = function(out pTexRef: PCUtexref; hmod: PCUmodule;
-    const name: PAnsiChar): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemGetInfo = function(var free: Cardinal; var total: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemAlloc = function(var dptr: TCUdeviceptr; bytesize: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemAllocPitch = function(var dptr: TCUdeviceptr; var pPitch: Cardinal;
-    WidthInBytes: Cardinal; Height: Cardinal; ElementSizeBytes: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemFree = function(dptr: TCUdeviceptr): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemGetAddressRange = function(var pbase: TCUdeviceptr; var psize: Cardinal;
-    dptr: TCUdeviceptr): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemAllocHost = function(var pp; bytesize: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemFreeHost = function(p: Pointer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemHostAlloc = function(var pp: Pointer; bytesize: Cardinal; Flags: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemHostGetDevicePointer = function(var pdptr: TCUdeviceptr; p: Pointer;
-    Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemHostGetFlags = function(var pFlags: Cardinal; var p): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyHtoD = function(dstDevice: TCUdeviceptr; const srcHost: Pointer;
-    ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyDtoH = function(const dstHost: Pointer; srcDevice: TCUdeviceptr;
-    ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyDtoD = function(dstDevice: TCUdeviceptr; srcDevice: TCUdeviceptr;
-    ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyDtoDAsync = function(dstDevice: TCUdeviceptr;
-    srcDevice: TCUdeviceptr; ByteCount: Cardinal; hStream: PCUstream)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyDtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
-    srcDevice: TCUdeviceptr; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyAtoD = function(dstDevice: TCUdeviceptr; hSrc: PCUarray;
-    SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyHtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
-    pSrc: Pointer; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyAtoH = function(dstHost: Pointer; srcArray: PCUarray;
-    SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyAtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
-    srcArray: PCUarray; SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpy2D = function(const pCopy: PCUDA_MEMCPY2D): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpy2DUnaligned = function(var pCopy: TCUDA_MEMCPY2D): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpy3D = function(var pCopy: TCUDA_MEMCPY3D): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyHtoDAsync = function(dstDevice: TCUdeviceptr; var srcHost;
-    ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyDtoHAsync = function(var dstHost; srcDevice: TCUdeviceptr;
-    ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyHtoAAsync = function(dstArray: PCUarray; dstIndex: Cardinal;
-    var pSrc; ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpyAtoHAsync = function(var dstHost; srcArray: PCUstream;
-    SrcIndex: Cardinal; ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpy2DAsync = function(var pCopy: TCUDA_MEMCPY2D; hStream: PCUstream)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemcpy3DAsync = function(var pCopy: TCUDA_MEMCPY3D; hStream: PCUstream)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemsetD8 = function(dstDevice: TCUdeviceptr; ub: Byte; N: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemsetD16 = function(dstDevice: TCUdeviceptr; uw: Word; N: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemsetD32 = function(dstDevice: TCUdeviceptr; ui: Cardinal; N: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemsetD2D8 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
-    ub: Byte; Width: Cardinal; Height: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemsetD2D16 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
-    uw: Word; Width: Cardinal; Height: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuMemsetD2D32 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
-    ui: Cardinal; Width: Cardinal; Height: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuFuncSetBlockShape = function(hfunc: PCUfunction; x: Integer; y: Integer;
-    z: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuFuncSetSharedSize = function(hfunc: PCUfunction; bytes: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuFuncGetAttribute = function(var pi: Integer; attrib: TCUfunction_attribute;
-    hfunc: PCUfunction): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuArrayCreate = function(var pHandle: PCUarray;
-    var pAllocateArray: TCUDA_ARRAY_DESCRIPTOR): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuArrayGetDescriptor = function(var pArrayDescriptor: TCUDA_ARRAY_DESCRIPTOR;
-    hArray: PCUarray): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuArrayDestroy = function(hArray: PCUarray): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuArray3DCreate = function(var pHandle: PCUarray;
-    var pAllocateArray: TCUDA_ARRAY3D_DESCRIPTOR): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuArray3DGetDescriptor = function(var pArrayDescriptor
-    : TCUDA_ARRAY3D_DESCRIPTOR; hArray: PCUarray): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefCreate = function(var pTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefDestroy = function(hTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetArray = function(hTexRef: PCUtexref; hArray: PCUarray;
-    Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetAddress = function(var ByteOffset: Cardinal; hTexRef: PCUtexref;
-    dptr: TCUdeviceptr; bytes: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetAddress2D = function(hTexRef: PCUtexref;
-    var desc: TCUDA_ARRAY_DESCRIPTOR; dptr: TCUdeviceptr; Pitch: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetFormat = function(hTexRef: PCUtexref; fmt: TCUarray_format;
-    NumPackedComponents: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetAddressMode = function(hTexRef: PCUtexref; dim: Integer;
-    am: TCUaddress_mode): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetFilterMode = function(hTexRef: PCUtexref; fm: TCUfilter_mode)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefSetFlags = function(hTexRef: PCUtexref; Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefGetAddress = function(var pdptr: TCUdeviceptr; hTexRef: PCUtexref)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefGetArray = function(var phArray: PCUarray; hTexRef: PCUtexref)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefGetAddressMode = function(var pam: TCUaddress_mode;
-    hTexRef: PCUtexref; dim: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefGetFilterMode = function(var pfm: TCUfilter_mode; hTexRef: PCUtexref)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefGetFormat = function(var pFormat: TCUarray_format;
-    var pNumChannels: Integer; hTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuTexRefGetFlags = function(var pFlags: Cardinal; hTexRef: PCUtexref)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuParamSetSize = function(hfunc: PCUfunction; numbytes: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuParamSeti = function(hfunc: PCUfunction; offset: Integer; value: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuParamSetf = function(hfunc: PCUfunction; offset: Integer; value: Single)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuParamSetv = function(hfunc: PCUfunction; offset: Integer; var ptr;
-    numbytes: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuParamSetTexRef = function(hfunc: PCUfunction; texunit: Integer;
-    hTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuLaunch = function(f: PCUfunction): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuLaunchGrid = function(f: PCUfunction; grid_width: Integer;
-    grid_height: Integer): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuLaunchGridAsync = function(f: PCUfunction; grid_width: Integer;
-    grid_height: Integer; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuEventCreate = function(var phEvent: PCUevent; Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuEventRecord = function(hEvent: PCUevent; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuEventQuery = function(hEvent: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuEventSynchronize = function(hEvent: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuEventDestroy = function(hEvent: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuEventElapsedTime = function(var pMilliseconds: Single; hStart: PCUevent;
-    hEnd: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuStreamCreate = function(var phStream: PCUstream; Flags: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuStreamQuery = function(hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuStreamSynchronize = function(hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuStreamDestroy = function(hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLCtxCreate = function(var pctx: PCUcontext; Flags: Cardinal;
-    device: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsGLRegisterBuffer = function(var pCudaResource: PCUgraphicsResource;
-    buffer: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsGLRegisterImage = function(var pCudaResource: PCUgraphicsResource;
-    image, target: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuWGLGetDevice = function(var pDevice: TCUdevice; hGpu: HGPUNV): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsUnregisterResource = function(resource: PCUgraphicsResource)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsSubResourceGetMappedArray = function(var pArray: PCUarray;
-    resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsResourceGetMappedPointer = function(var pDevPtr: TCUdeviceptr;
-    out psize: Cardinal; resource: PCUgraphicsResource): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsResourceSetMapFlags = function(resource: PCUgraphicsResource;
-    Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsMapResources = function(count: Cardinal;
-    resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGraphicsUnmapResources = function(count: Cardinal;
-    resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLInit = procedure();
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLRegisterBufferObject = function(buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLMapBufferObject = function(var dptr: TCUdeviceptr; var size: Cardinal;
-    buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLUnmapBufferObject = function(buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLUnregisterBufferObject = function(buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLSetBufferObjectMapFlags = function(buffer: Cardinal; Flags: Cardinal)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLMapBufferObjectAsync = function(var dptr: TCUdeviceptr;
-    var size: Cardinal; buffer: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-  TcuGLUnmapBufferObjectAsync = function(buffer: Cardinal; hStream: PCUstream)
-    : TCUresult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}
-
-var
-  cuInit: TcuInit;
-  cuDriverGetVersion: TcuDriverGetVersion;
-  cuDeviceGet: TcuDeviceGet;
-  cuDeviceGetCount: TcuDeviceGetCount;
-  cuDeviceGetName: TcuDeviceGetName;
-  cuDeviceComputeCapability: TcuDeviceComputeCapability;
-  cuDeviceTotalMem: TcuDeviceTotalMem;
-  cuDeviceGetProperties: TcuDeviceGetProperties;
-  cuDeviceGetAttribute: TcuDeviceGetAttribute;
-  cuCtxCreate: TcuCtxCreate;
-  cuCtxDestroy: TcuCtxDestroy;
-  cuCtxAttach: TcuCtxAttach;
-  cuCtxDetach: TcuCtxDetach;
-  cuCtxPushCurrent: TcuCtxPushCurrent;
-  cuCtxPopCurrent: TcuCtxPopCurrent;
-  cuCtxGetDevice: TcuCtxGetDevice;
-  cuCtxSynchronize: TcuCtxSynchronize;
-  cuModuleLoad: TcuModuleLoad;
-  cuModuleLoadData: TcuModuleLoadData;
-  cuModuleLoadDataEx: TcuModuleLoadDataEx;
-  cuModuleLoadFatBinary: TcuModuleLoadFatBinary;
-  cuModuleUnload: TcuModuleUnload;
-  cuModuleGetFunction: TcuModuleGetFunction;
-  cuModuleGetGlobal: TcuModuleGetGlobal;
-  cuModuleGetTexRef: TcuModuleGetTexRef;
-  cuMemGetInfo: TcuMemGetInfo;
-  cuMemAlloc: TcuMemAlloc;
-  cuMemAllocPitch: TcuMemAllocPitch;
-  cuMemFree: TcuMemFree;
-  cuMemGetAddressRange: TcuMemGetAddressRange;
-  cuMemAllocHost: TcuMemAllocHost;
-  cuMemFreeHost: TcuMemFreeHost;
-  cuMemHostAlloc: TcuMemHostAlloc;
-  cuMemHostGetDevicePointer: TcuMemHostGetDevicePointer;
-  cuMemHostGetFlags: TcuMemHostGetFlags;
-  cuMemcpyHtoD: TcuMemcpyHtoD;
-  cuMemcpyDtoH: TcuMemcpyDtoH;
-  cuMemcpyDtoD: TcuMemcpyDtoD;
-  cuMemcpyDtoDAsync: TcuMemcpyDtoDAsync;
-  cuMemcpyDtoA: TcuMemcpyDtoA;
-  cuMemcpyAtoD: TcuMemcpyAtoD;
-  cuMemcpyHtoA: TcuMemcpyHtoA;
-  cuMemcpyAtoH: TcuMemcpyAtoH;
-  cuMemcpyAtoA: TcuMemcpyAtoA;
-  cuMemcpy2D: TcuMemcpy2D;
-  cuMemcpy2DUnaligned: TcuMemcpy2DUnaligned;
-  cuMemcpy3D: TcuMemcpy3D;
-  cuMemcpyHtoDAsync: TcuMemcpyHtoDAsync;
-  cuMemcpyDtoHAsync: TcuMemcpyDtoHAsync;
-  cuMemcpyHtoAAsync: TcuMemcpyHtoAAsync;
-  cuMemcpyAtoHAsync: TcuMemcpyAtoHAsync;
-  cuMemcpy2DAsync: TcuMemcpy2DAsync;
-  cuMemcpy3DAsync: TcuMemcpy3DAsync;
-  cuMemsetD8: TcuMemsetD8;
-  cuMemsetD16: TcuMemsetD16;
-  cuMemsetD32: TcuMemsetD32;
-  cuMemsetD2D8: TcuMemsetD2D8;
-  cuMemsetD2D16: TcuMemsetD2D16;
-  cuMemsetD2D32: TcuMemsetD2D32;
-  cuFuncSetBlockShape: TcuFuncSetBlockShape;
-  cuFuncSetSharedSize: TcuFuncSetSharedSize;
-  cuFuncGetAttribute: TcuFuncGetAttribute;
-  cuArrayCreate: TcuArrayCreate;
-  cuArrayGetDescriptor: TcuArrayGetDescriptor;
-  cuArrayDestroy: TcuArrayDestroy;
-  cuArray3DCreate: TcuArray3DCreate;
-  cuArray3DGetDescriptor: TcuArray3DGetDescriptor;
-  cuTexRefCreate: TcuTexRefCreate;
-  cuTexRefDestroy: TcuTexRefDestroy;
-  cuTexRefSetArray: TcuTexRefSetArray;
-  cuTexRefSetAddress: TcuTexRefSetAddress;
-  cuTexRefSetAddress2D: TcuTexRefSetAddress2D;
-  cuTexRefSetFormat: TcuTexRefSetFormat;
-  cuTexRefSetAddressMode: TcuTexRefSetAddressMode;
-  cuTexRefSetFilterMode: TcuTexRefSetFilterMode;
-  cuTexRefSetFlags: TcuTexRefSetFlags;
-  cuTexRefGetAddress: TcuTexRefGetAddress;
-  cuTexRefGetArray: TcuTexRefGetArray;
-  cuTexRefGetAddressMode: TcuTexRefGetAddressMode;
-  cuTexRefGetFilterMode: TcuTexRefGetFilterMode;
-  cuTexRefGetFormat: TcuTexRefGetFormat;
-  cuTexRefGetFlags: TcuTexRefGetFlags;
-  cuParamSetSize: TcuParamSetSize;
-  cuParamSeti: TcuParamSeti;
-  cuParamSetf: TcuParamSetf;
-  cuParamSetv: TcuParamSetv;
-  cuParamSetTexRef: TcuParamSetTexRef;
-  cuLaunch: TcuLaunch;
-  cuLaunchGrid: TcuLaunchGrid;
-  cuLaunchGridAsync: TcuLaunchGridAsync;
-  cuEventCreate: TcuEventCreate;
-  cuEventRecord: TcuEventRecord;
-  cuEventQuery: TcuEventQuery;
-  cuEventSynchronize: TcuEventSynchronize;
-  cuEventDestroy: TcuEventDestroy;
-  cuEventElapsedTime: TcuEventElapsedTime;
-  cuStreamCreate: TcuStreamCreate;
-  cuStreamQuery: TcuStreamQuery;
-  cuStreamSynchronize: TcuStreamSynchronize;
-  cuStreamDestroy: TcuStreamDestroy;
-  cuGLInit: TcuGLInit;
-  cuGLCtxCreate: TcuGLCtxCreate;
-  cuGraphicsGLRegisterBuffer: TcuGraphicsGLRegisterBuffer;
-  cuGraphicsGLRegisterImage: TcuGraphicsGLRegisterImage;
-  cuWGLGetDevice: TcuWGLGetDevice;
-  cuGraphicsUnregisterResource: TcuGraphicsUnregisterResource;
-  cuGraphicsSubResourceGetMappedArray: TcuGraphicsSubResourceGetMappedArray;
-  cuGraphicsResourceGetMappedPointer: TcuGraphicsResourceGetMappedPointer;
-  cuGraphicsResourceSetMapFlags: TcuGraphicsResourceSetMapFlags;
-  cuGraphicsMapResources: TcuGraphicsMapResources;
-  cuGraphicsUnmapResources: TcuGraphicsUnmapResources;
-  cuGLRegisterBufferObject: TcuGLRegisterBufferObject;
-  cuGLMapBufferObject: TcuGLMapBufferObject;
-  cuGLUnmapBufferObject: TcuGLUnmapBufferObject;
-  cuGLUnregisterBufferObject: TcuGLUnregisterBufferObject;
-  cuGLSetBufferObjectMapFlags: TcuGLSetBufferObjectMapFlags;
-  cuGLMapBufferObjectAsync: TcuGLMapBufferObjectAsync;
-  cuGLUnmapBufferObjectAsync: TcuGLUnmapBufferObjectAsync;
-
-function InitCUDA: Boolean;
-procedure CloseCUDA;
-function InitCUDAFromLibrary(const LibName: WideString): Boolean;
-function IsCUDAInitialized: Boolean;
-function Get_CUDA_API_Error_String(AError: TCUresult): string;
-
-//==============================================================
-implementation
-//==============================================================
-
-resourcestring
-  cudasFuncRetErr = '%s return error: %s';
-
-const
-  INVALID_MODULEHANDLE = 0;
-
-  // ************** Windows specific ********************
-{$IFDEF MSWINDOWS}
-
-var
-  CUDAHandle: HINST;
-{$ENDIF}
-  // ************** UNIX specific ********************
-{$IFDEF UNIX}
-
-var
-  CUDAHandle: TLibHandle;
-{$ENDIF}
-
-const
-  cuInitName = 'cuInit';
-  cuDriverGetVersionName = 'cuDriverGetVersion';
-  cuDeviceGet_Name = 'cuDeviceGet';
-  cuDeviceGetCountName = 'cuDeviceGetCount';
-  cuDeviceGetNameName = 'cuDeviceGetName';
-  cuDeviceComputeCapabilityName = 'cuDeviceComputeCapability';
-  cuDeviceTotalMemName = 'cuDeviceTotalMem';
-  cuDeviceGetPropertiesName = 'cuDeviceGetProperties';
-  cuDeviceGetAttributeName = 'cuDeviceGetAttribute';
-  cuCtxCreateName = 'cuCtxCreate';
-  cuCtxDestroyName = 'cuCtxDestroy';
-  cuCtxAttachName = 'cuCtxAttach';
-  cuCtxDetachName = 'cuCtxDetach';
-  cuCtxPushCurrentName = 'cuCtxPushCurrent';
-  cuCtxPopCurrentName = 'cuCtxPopCurrent';
-  cuCtxGetDeviceName = 'cuCtxGetDevice';
-  cuCtxSynchronizeName = 'cuCtxSynchronize';
-  cuModuleLoadName = 'cuModuleLoad';
-  cuModuleLoadDataName = 'cuModuleLoadData';
-  cuModuleLoadDataExName = 'cuModuleLoadDataEx';
-  cuModuleLoadFatBinaryName = 'cuModuleLoadFatBinary';
-  cuModuleUnloadName = 'cuModuleUnload';
-  cuModuleGetFunctionName = 'cuModuleGetFunction';
-  cuModuleGetGlobalName = 'cuModuleGetGlobal';
-  cuModuleGetTexRefName = 'cuModuleGetTexRef';
-  cuMemGetInfoName = 'cuMemGetInfo';
-  cuMemAllocName = 'cuMemAlloc';
-  cuMemAllocPitchName = 'cuMemAllocPitch';
-  cuMemFreeName = 'cuMemFree';
-  cuMemGetAddressRangeName = 'cuMemGetAddressRange';
-  cuMemAllocHostName = 'cuMemAllocHost';
-  cuMemFreeHostName = 'cuMemFreeHost';
-  cuMemHostAllocName = 'cuMemHostAlloc';
-  cuMemHostGetDevicePointerName = 'cuMemHostGetDevicePointer';
-  cuMemHostGetFlagsName = 'cuMemHostGetFlags';
-  cuMemcpyHtoDName = 'cuMemcpyHtoD';
-  cuMemcpyDtoHName = 'cuMemcpyDtoH';
-  cuMemcpyDtoDName = 'cuMemcpyDtoD';
-  cuMemcpyDtoDAsyncName = 'cuMemcpyDtoDAsync';
-  cuMemcpyDtoAName = 'cuMemcpyDtoA';
-  cuMemcpyAtoDName = 'cuMemcpyAtoD';
-  cuMemcpyHtoAName = 'cuMemcpyHtoA';
-  cuMemcpyAtoHName = 'cuMemcpyAtoH';
-  cuMemcpyAtoAName = 'cuMemcpyAtoA';
-  cuMemcpy2DName = 'cuMemcpy2D';
-  cuMemcpy2DUnalignedName = 'cuMemcpy2DUnaligned';
-  cuMemcpy3DName = 'cuMemcpy3D';
-  cuMemcpyHtoDAsyncName = 'cuMemcpyHtoDAsync';
-  cuMemcpyDtoHAsyncName = 'cuMemcpyDtoHAsync';
-  cuMemcpyHtoAAsyncName = 'cuMemcpyHtoAAsync';
-  cuMemcpyAtoHAsyncName = 'cuMemcpyAtoHAsync';
-  cuMemcpy2DAsyncName = 'cuMemcpy2DAsync';
-  cuMemcpy3DAsyncName = 'cuMemcpy3DAsync';
-  cuMemsetD8Name = 'cuMemsetD8';
-  cuMemsetD16Name = 'cuMemsetD16';
-  cuMemsetD32Name = 'cuMemsetD32';
-  cuMemsetD2D8Name = 'cuMemsetD2D8';
-  cuMemsetD2D16Name = 'cuMemsetD2D16';
-  cuMemsetD2D32Name = 'cuMemsetD2D32';
-  cuFuncSetBlockShapeName = 'cuFuncSetBlockShape';
-  cuFuncSetSharedSizeName = 'cuFuncSetSharedSize';
-  cuFuncGetAttributeName = 'cuFuncGetAttribute';
-  cuArrayCreateName = 'cuArrayCreate';
-  cuArrayGetDescriptorName = 'cuArrayGetDescriptor';
-  cuArrayDestroyName = 'cuArrayDestroy';
-  cuArray3DCreateName = 'cuArray3DCreate';
-  cuArray3DGetDescriptorName = 'cuArray3DGetDescriptor';
-  cuTexRefCreateName = 'cuTexRefCreate';
-  cuTexRefDestroyName = 'cuTexRefDestroy';
-  cuTexRefSetArrayName = 'cuTexRefSetArray';
-  cuTexRefSetAddressName = 'cuTexRefSetAddress';
-  cuTexRefSetAddress2DName = 'cuTexRefSetAddress2D';
-  cuTexRefSetFormatName = 'cuTexRefSetFormat';
-  cuTexRefSetAddressModeName = 'cuTexRefSetAddressMode';
-  cuTexRefSetFilterModeName = 'cuTexRefSetFilterMode';
-  cuTexRefSetFlagsName = 'cuTexRefSetFlags';
-  cuTexRefGetAddressName = 'cuTexRefGetAddress';
-  cuTexRefGetArrayName = 'cuTexRefGetArray';
-  cuTexRefGetAddressModeName = 'cuTexRefGetAddressMode';
-  cuTexRefGetFilterModeName = 'cuTexRefGetFilterMode';
-  cuTexRefGetFormatName = 'cuTexRefGetFormat';
-  cuTexRefGetFlagsName = 'cuTexRefGetFlags';
-  cuParamSetSizeName = 'cuParamSetSize';
-  cuParamSetiName = 'cuParamSeti';
-  cuParamSetfName = 'cuParamSetf';
-  cuParamSetvName = 'cuParamSetv';
-  cuParamSetTexRefName = 'cuParamSetTexRef';
-  cuLaunchName = 'cuLaunch';
-  cuLaunchGridName = 'cuLaunchGrid';
-  cuLaunchGridAsyncName = 'cuLaunchGridAsync';
-  cuEventCreateName = 'cuEventCreate';
-  cuEventRecordName = 'cuEventRecord';
-  cuEventQueryName = 'cuEventQuery';
-  cuEventSynchronizeName = 'cuEventSynchronize';
-  cuEventDestroyName = 'cuEventDestroy';
-  cuEventElapsedTimeName = 'cuEventElapsedTime';
-  cuStreamCreateName = 'cuStreamCreate';
-  cuStreamQueryName = 'cuStreamQuery';
-  cuStreamSynchronizeName = 'cuStreamSynchronize';
-  cuStreamDestroyName = 'cuStreamDestroy';
-  cuGLCtxCreateName = 'cuGLCtxCreate';
-  cuGraphicsGLRegisterBufferName = 'cuGraphicsGLRegisterBuffer';
-  cuGraphicsGLRegisterImageName = 'cuGraphicsGLRegisterImage';
-  cuWGLGetDeviceName = 'cuWGLGetDevice';
-  cuGraphicsUnregisterResourceName = 'cuGraphicsUnregisterResource';
-  cuGraphicsSubResourceGetMappedArrayName =
-    'cuGraphicsSubResourceGetMappedArray';
-  cuGraphicsResourceGetMappedPointerName = 'cuGraphicsResourceGetMappedPointer';
-  cuGraphicsResourceSetMapFlagsName = 'cuGraphicsResourceSetMapFlags';
-  cuGraphicsMapResourcesName = 'cuGraphicsMapResources';
-  cuGraphicsUnmapResourcesName = 'cuGraphicsUnmapResources';
-  cuGLInitName = 'cuGLInit';
-  cuGLRegisterBufferObjectName = 'cuGLRegisterBufferObject';
-  cuGLMapBufferObjectName = 'cuGLMapBufferObject';
-  cuGLUnmapBufferObjectName = 'cuGLUnmapBufferObject';
-  cuGLUnregisterBufferObjectName = 'cuGLUnregisterBufferObject';
-  cuGLSetBufferObjectMapFlagsName = 'cuGLSetBufferObjectMapFlags';
-  cuGLMapBufferObjectAsyncName = 'cuGLMapBufferObjectAsync';
-  cuGLUnmapBufferObjectAsyncName = 'cuGLUnmapBufferObjectAsync';
-
-{$IFDEF USE_CUDA_DEBUG_MODE}
-
-var
-  cuInit_: TcuInit;
-  cuDriverGetVersion_: TcuDriverGetVersion;
-  cuDeviceGet_: TcuDeviceGet;
-  cuDeviceGetCount_: TcuDeviceGetCount;
-  cuDeviceGetName_: TcuDeviceGetName;
-  cuDeviceComputeCapability_: TcuDeviceComputeCapability;
-  cuDeviceTotalMem_: TcuDeviceTotalMem;
-  cuDeviceGetProperties_: TcuDeviceGetProperties;
-  cuDeviceGetAttribute_: TcuDeviceGetAttribute;
-  cuCtxCreate_: TcuCtxCreate;
-  cuCtxDestroy_: TcuCtxDestroy;
-  cuCtxAttach_: TcuCtxAttach;
-  cuCtxDetach_: TcuCtxDetach;
-  cuCtxPushCurrent_: TcuCtxPushCurrent;
-  cuCtxPopCurrent_: TcuCtxPopCurrent;
-  cuCtxGetDevice_: TcuCtxGetDevice;
-  cuCtxSynchronize_: TcuCtxSynchronize;
-  cuModuleLoad_: TcuModuleLoad;
-  cuModuleLoadData_: TcuModuleLoadData;
-  cuModuleLoadDataEx_: TcuModuleLoadDataEx;
-  cuModuleLoadFatBinary_: TcuModuleLoadFatBinary;
-  cuModuleUnload_: TcuModuleUnload;
-  cuModuleGetFunction_: TcuModuleGetFunction;
-  cuModuleGetGlobal_: TcuModuleGetGlobal;
-  cuModuleGetTexRef_: TcuModuleGetTexRef;
-  cuMemGetInfo_: TcuMemGetInfo;
-  cuMemAlloc_: TcuMemAlloc;
-  cuMemAllocPitch_: TcuMemAllocPitch;
-  cuMemFree_: TcuMemFree;
-  cuMemGetAddressRange_: TcuMemGetAddressRange;
-  cuMemAllocHost_: TcuMemAllocHost;
-  cuMemFreeHost_: TcuMemFreeHost;
-  cuMemHostAlloc_: TcuMemHostAlloc;
-  cuMemHostGetDevicePointer_: TcuMemHostGetDevicePointer;
-  cuMemHostGetFlags_: TcuMemHostGetFlags;
-  cuMemcpyHtoD_: TcuMemcpyHtoD;
-  cuMemcpyDtoH_: TcuMemcpyDtoH;
-  cuMemcpyDtoD_: TcuMemcpyDtoD;
-  cuMemcpyDtoDAsync_: TcuMemcpyDtoDAsync;
-  cuMemcpyDtoA_: TcuMemcpyDtoA;
-  cuMemcpyAtoD_: TcuMemcpyAtoD;
-  cuMemcpyHtoA_: TcuMemcpyHtoA;
-  cuMemcpyAtoH_: TcuMemcpyAtoH;
-  cuMemcpyAtoA_: TcuMemcpyAtoA;
-  cuMemcpy2D_: TcuMemcpy2D;
-  cuMemcpy2DUnaligned_: TcuMemcpy2DUnaligned;
-  cuMemcpy3D_: TcuMemcpy3D;
-  cuMemcpyHtoDAsync_: TcuMemcpyHtoDAsync;
-  cuMemcpyDtoHAsync_: TcuMemcpyDtoHAsync;
-  cuMemcpyHtoAAsync_: TcuMemcpyHtoAAsync;
-  cuMemcpyAtoHAsync_: TcuMemcpyAtoHAsync;
-  cuMemcpy2DAsync_: TcuMemcpy2DAsync;
-  cuMemcpy3DAsync_: TcuMemcpy3DAsync;
-  cuMemsetD8_: TcuMemsetD8;
-  cuMemsetD16_: TcuMemsetD16;
-  cuMemsetD32_: TcuMemsetD32;
-  cuMemsetD2D8_: TcuMemsetD2D8;
-  cuMemsetD2D16_: TcuMemsetD2D16;
-  cuMemsetD2D32_: TcuMemsetD2D32;
-  cuFuncSetBlockShape_: TcuFuncSetBlockShape;
-  cuFuncSetSharedSize_: TcuFuncSetSharedSize;
-  cuFuncGetAttribute_: TcuFuncGetAttribute;
-  cuArrayCreate_: TcuArrayCreate;
-  cuArrayGetDescriptor_: TcuArrayGetDescriptor;
-  cuArrayDestroy_: TcuArrayDestroy;
-  cuArray3DCreate_: TcuArray3DCreate;
-  cuArray3DGetDescriptor_: TcuArray3DGetDescriptor;
-  cuTexRefCreate_: TcuTexRefCreate;
-  cuTexRefDestroy_: TcuTexRefDestroy;
-  cuTexRefSetArray_: TcuTexRefSetArray;
-  cuTexRefSetAddress_: TcuTexRefSetAddress;
-  cuTexRefSetAddress2D_: TcuTexRefSetAddress2D;
-  cuTexRefSetFormat_: TcuTexRefSetFormat;
-  cuTexRefSetAddressMode_: TcuTexRefSetAddressMode;
-  cuTexRefSetFilterMode_: TcuTexRefSetFilterMode;
-  cuTexRefSetFlags_: TcuTexRefSetFlags;
-  cuTexRefGetAddress_: TcuTexRefGetAddress;
-  cuTexRefGetArray_: TcuTexRefGetArray;
-  cuTexRefGetAddressMode_: TcuTexRefGetAddressMode;
-  cuTexRefGetFilterMode_: TcuTexRefGetFilterMode;
-  cuTexRefGetFormat_: TcuTexRefGetFormat;
-  cuTexRefGetFlags_: TcuTexRefGetFlags;
-  cuParamSetSize_: TcuParamSetSize;
-  cuParamSeti_: TcuParamSeti;
-  cuParamSetf_: TcuParamSetf;
-  cuParamSetv_: TcuParamSetv;
-  cuParamSetTexRef_: TcuParamSetTexRef;
-  cuLaunch_: TcuLaunch;
-  cuLaunchGrid_: TcuLaunchGrid;
-  cuLaunchGridAsync_: TcuLaunchGridAsync;
-  cuEventCreate_: TcuEventCreate;
-  cuEventRecord_: TcuEventRecord;
-  cuEventQuery_: TcuEventQuery;
-  cuEventSynchronize_: TcuEventSynchronize;
-  cuEventDestroy_: TcuEventDestroy;
-  cuEventElapsedTime_: TcuEventElapsedTime;
-  cuStreamCreate_: TcuStreamCreate;
-  cuStreamQuery_: TcuStreamQuery;
-  cuStreamSynchronize_: TcuStreamSynchronize;
-  cuStreamDestroy_: TcuStreamDestroy;
-  cuGLCtxCreate_: TcuGLCtxCreate;
-  cuGraphicsGLRegisterBuffer_: TcuGraphicsGLRegisterBuffer;
-  cuGraphicsGLRegisterImage_: TcuGraphicsGLRegisterImage;
-  cuWGLGetDevice_: TcuWGLGetDevice;
-  cuGraphicsUnregisterResource_: TcuGraphicsUnregisterResource;
-  cuGraphicsSubResourceGetMappedArray_: TcuGraphicsSubResourceGetMappedArray;
-  cuGraphicsResourceGetMappedPointer_: TcuGraphicsResourceGetMappedPointer;
-  cuGraphicsResourceSetMapFlags_: TcuGraphicsResourceSetMapFlags;
-  cuGraphicsMapResources_: TcuGraphicsMapResources;
-  cuGraphicsUnmapResources_: TcuGraphicsUnmapResources;
-  cuGLRegisterBufferObject_: TcuGLRegisterBufferObject;
-  cuGLMapBufferObject_: TcuGLMapBufferObject;
-  cuGLUnmapBufferObject_: TcuGLUnmapBufferObject;
-  cuGLUnregisterBufferObject_: TcuGLUnregisterBufferObject;
-  cuGLSetBufferObjectMapFlags_: TcuGLSetBufferObjectMapFlags;
-  cuGLMapBufferObjectAsync_: TcuGLMapBufferObjectAsync;
-  cuGLUnmapBufferObjectAsync_: TcuGLUnmapBufferObjectAsync;
-
-function cuInitShell(Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuInit_(Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuInitName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDriverGetVersionShell(out driverVersion: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDriverGetVersion_(driverVersion);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDriverGetVersionName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceGetShell(var device: TCUdevice; ordinal: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceGet_(device, ordinal);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuDeviceGet_Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceGetCountShell(var count: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceGetCount_(count);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetCountName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceGetNameShell(name: PAnsiChar; len: Integer; dev: TCUdevice)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceGetName_(name, len, dev);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetNameName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceComputeCapabilityShell(var major: Integer; var minor: Integer;
-  dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceComputeCapability_(major, minor, dev);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceComputeCapabilityName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceTotalMemShell(bytes: PSize_t; dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceTotalMem_(bytes, dev);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceTotalMemName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceGetPropertiesShell(var prop: TCUdevprop; dev: TCUdevice)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceGetProperties_(prop, dev);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetPropertiesName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuDeviceGetAttributeShell(pi: PSize_t; attrib: TCUdevice_attribute;
-  dev: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuDeviceGetAttribute_(pi, attrib, dev);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetAttributeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxCreateShell(var pctx: PCUcontext; Flags: Cardinal; dev: TCUdevice)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxCreate_(pctx, Flags, dev);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuCtxCreateName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxDestroyShell(ctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxDestroy_(ctx);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuCtxDestroyName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxAttachShell(var pctx: PCUcontext; Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxAttach_(pctx, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuCtxAttachName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxDetachShell(ctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxDetach_(ctx);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuCtxDetachName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxPushCurrentShell(ctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxPushCurrent_(ctx);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxPushCurrentName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxPopCurrentShell(var pctx: PCUcontext): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxPopCurrent_(pctx);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxPopCurrentName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxGetDeviceShell(var device: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxGetDevice_(device);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuCtxGetDeviceName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuCtxSynchronizeShell: TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuCtxSynchronize_;
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxSynchronizeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleLoadShell(var module: PCUmodule; const fname: PAnsiChar)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleLoad_(module, fname);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuModuleLoadName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleLoadDataShell(var module: PCUmodule; const image: PAnsiChar)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleLoadData_(module, image);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadDataName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleLoadDataExShell(var module: PCUmodule; var image;
-  numOptions: Cardinal; var options: TCUjit_option; var optionValues)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleLoadDataEx_(module, image, numOptions, options,
-    optionValues);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadDataExName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleLoadFatBinaryShell(var module: PCUmodule; var fatCubin)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleLoadFatBinary_(module, fatCubin);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadFatBinaryName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleUnloadShell(hmod: PCUmodule): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleUnload_(hmod);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuModuleUnloadName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleGetFunctionShell(out hfunc: PCUfunction; hmod: PCUmodule;
-  const name: PAnsiChar): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleGetFunction_(hfunc, hmod, name);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetFunctionName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleGetGlobalShell(out dptr: TCUdeviceptr; var bytes: Cardinal;
-  hmod: PCUmodule; const name: PAnsiChar): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleGetGlobal_(dptr, bytes, hmod, name);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetGlobalName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuModuleGetTexRefShell(out pTexRef: PCUtexref; hmod: PCUmodule;
-  const name: PAnsiChar): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuModuleGetTexRef_(pTexRef, hmod, name);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetTexRefName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemGetInfoShell(var free: Cardinal; var total: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemGetInfo_(free, total);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemGetInfoName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemAllocShell(var dptr: TCUdeviceptr; bytesize: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemAlloc_(dptr, bytesize);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemAllocName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemAllocPitchShell(var dptr: TCUdeviceptr; var pPitch: Cardinal;
-  WidthInBytes: Cardinal; Height: Cardinal; ElementSizeBytes: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemAllocPitch_(dptr, pPitch, WidthInBytes, Height,
-    ElementSizeBytes);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemAllocPitchName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemFreeShell(dptr: TCUdeviceptr): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemFree_(dptr);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemFreeName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemGetAddressRangeShell(var pbase: TCUdeviceptr; var psize: Cardinal;
-  dptr: TCUdeviceptr): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemGetAddressRange_(pbase, psize, dptr);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemGetAddressRangeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemAllocHostShell(var pp; bytesize: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemAllocHost_(pp, bytesize);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemAllocHostName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemFreeHostShell(p: Pointer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemFreeHost_(p);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemFreeHostName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemHostAllocShell(var pp: Pointer; bytesize: Cardinal; Flags: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemHostAlloc_(pp, bytesize, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemHostAllocName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemHostGetDevicePointerShell(var pdptr: TCUdeviceptr; p: Pointer;
-  Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemHostGetDevicePointer_(pdptr, p, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemHostGetDevicePointerName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemHostGetFlagsShell(var pFlags: Cardinal; var p): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemHostGetFlags_(pFlags, p);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemHostGetFlagsName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyHtoDShell(dstDevice: TCUdeviceptr; const srcHost: Pointer;
-  ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyHtoD_(dstDevice, srcHost, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyHtoDName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyDtoHShell(const dstHost: Pointer; srcDevice: TCUdeviceptr;
-  ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyDtoH_(dstHost, srcDevice, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyDtoHName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyDtoDShell(dstDevice: TCUdeviceptr; srcDevice: TCUdeviceptr;
-  ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyDtoD_(dstDevice, srcDevice, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyDtoDName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyDtoDAsyncShell(dstDevice: TCUdeviceptr;
-  srcDevice: TCUdeviceptr; ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall; {$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyDtoDAsync_(dstDevice, srcDevice, ByteCount, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyDtoDAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyDtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
-  srcDevice: TCUdeviceptr; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyDtoA_(dstArray, dstIndex, srcDevice, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyDtoAName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyAtoDShell(dstDevice: TCUdeviceptr; hSrc: PCUarray;
-  SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyAtoD_(dstDevice, hSrc, SrcIndex, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyAtoDName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyHtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
-  pSrc: Pointer; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyHtoA_(dstArray, dstIndex, pSrc, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyHtoAName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyAtoHShell(dstHost: Pointer; srcArray: PCUarray;
-  SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyAtoH_(dstHost, srcArray, SrcIndex, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyAtoHName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyAtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
-  srcArray: PCUarray; SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyAtoA_(dstArray, dstIndex, srcArray, SrcIndex, ByteCount);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpyAtoAName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpy2DShell(const pCopy: PCUDA_MEMCPY2D): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpy2D_(pCopy);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpy2DName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpy2DUnalignedShell(var pCopy: TCUDA_MEMCPY2D): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpy2DUnaligned_(pCopy);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy2DUnalignedName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpy3DShell(var pCopy: TCUDA_MEMCPY3D): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpy3D_(pCopy);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemcpy3DName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyHtoDAsyncShell(dstDevice: TCUdeviceptr; var srcHost;
-  ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyHtoDAsync_(dstDevice, srcHost, ByteCount, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyHtoDAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyDtoHAsyncShell(var dstHost; srcDevice: TCUdeviceptr;
-  ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyDtoHAsync_(dstHost, srcDevice, ByteCount, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyDtoHAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyHtoAAsyncShell(dstArray: PCUarray; dstIndex: Cardinal;
-  var pSrc; ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyHtoAAsync_(dstArray, dstIndex, pSrc, ByteCount, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyHtoAAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpyAtoHAsyncShell(var dstHost; srcArray: PCUstream;
-  SrcIndex: Cardinal; ByteCount: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpyAtoHAsync_(dstHost, srcArray, SrcIndex, ByteCount, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyAtoHAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpy2DAsyncShell(var pCopy: TCUDA_MEMCPY2D; hStream: PCUstream)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpy2DAsync_(pCopy, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy2DAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemcpy3DAsyncShell(var pCopy: TCUDA_MEMCPY3D; hStream: PCUstream)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemcpy3DAsync_(pCopy, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy3DAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemsetD8Shell(dstDevice: TCUdeviceptr; ub: Byte; N: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemsetD8_(dstDevice, ub, N);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemsetD8Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemsetD16Shell(dstDevice: TCUdeviceptr; uw: Word; N: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemsetD16_(dstDevice, uw, N);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemsetD16Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemsetD32Shell(dstDevice: TCUdeviceptr; ui: Cardinal; N: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemsetD32_(dstDevice, ui, N);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemsetD32Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemsetD2D8Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
-  ub: Byte; Width: Cardinal; Height: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemsetD2D8_(dstDevice, dstPitch, ub, Width, Height);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemsetD2D8Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemsetD2D16Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
-  uw: Word; Width: Cardinal; Height: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemsetD2D16_(dstDevice, dstPitch, uw, Width, Height);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemsetD2D16Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuMemsetD2D32Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
-  ui: Cardinal; Width: Cardinal; Height: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuMemsetD2D32_(dstDevice, dstPitch, ui, Width, Height);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuMemsetD2D32Name, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuFuncSetBlockShapeShell(hfunc: PCUfunction; x: Integer; y: Integer;
-  z: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuFuncSetBlockShape_(hfunc, x, y, z);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncSetBlockShapeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuFuncSetSharedSizeShell(hfunc: PCUfunction; bytes: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuFuncSetSharedSize_(hfunc, bytes);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncSetSharedSizeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuFuncGetAttributeShell(var pi: Integer; attrib: TCUfunction_attribute;
-  hfunc: PCUfunction): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuFuncGetAttribute_(pi, attrib, hfunc);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncGetAttributeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuArrayCreateShell(var pHandle: PCUarray;
-  var pAllocateArray: TCUDA_ARRAY_DESCRIPTOR): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuArrayCreate_(pHandle, pAllocateArray);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuArrayCreateName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuArrayGetDescriptorShell(var pArrayDescriptor: TCUDA_ARRAY_DESCRIPTOR;
-  hArray: PCUarray): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuArrayGetDescriptor_(pArrayDescriptor, hArray);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArrayGetDescriptorName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuArrayDestroyShell(hArray: PCUarray): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuArrayDestroy_(hArray);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuArrayDestroyName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuArray3DCreateShell(var pHandle: PCUarray;
-  var pAllocateArray: TCUDA_ARRAY3D_DESCRIPTOR): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuArray3DCreate_(pHandle, pAllocateArray);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArray3DCreateName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuArray3DGetDescriptorShell(var pArrayDescriptor
-  : TCUDA_ARRAY3D_DESCRIPTOR; hArray: PCUarray): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuArray3DGetDescriptor_(pArrayDescriptor, hArray);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArray3DGetDescriptorName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefCreateShell(var pTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;
-{$ELSE}cdecl;
-{$ENDIF}
-begin
-  Result := cuTexRefCreate_(pTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuTexRefCreateName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefDestroyShell(hTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefDestroy_(hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefDestroyName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetArrayShell(hTexRef: PCUtexref; hArray: PCUarray;
-  Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetArray_(hTexRef, hArray, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetArrayName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetAddressShell(var ByteOffset: Cardinal; hTexRef: PCUtexref;
-  dptr: TCUdeviceptr; bytes: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetAddress_(ByteOffset, hTexRef, dptr, bytes);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddressName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetAddress2DShell(hTexRef: PCUtexref;
-  var desc: TCUDA_ARRAY_DESCRIPTOR; dptr: TCUdeviceptr; Pitch: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetAddress2D_(hTexRef, desc, dptr, Pitch);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddress2DName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetFormatShell(hTexRef: PCUtexref; fmt: TCUarray_format;
-  NumPackedComponents: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetFormat_(hTexRef, fmt, NumPackedComponents);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFormatName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetAddressModeShell(hTexRef: PCUtexref; dim: Integer;
-  am: TCUaddress_mode): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetAddressMode_(hTexRef, dim, am);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddressModeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetFilterModeShell(hTexRef: PCUtexref; fm: TCUfilter_mode)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetFilterMode_(hTexRef, fm);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFilterModeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefSetFlagsShell(hTexRef: PCUtexref; Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefSetFlags_(hTexRef, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFlagsName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefGetAddressShell(var pdptr: TCUdeviceptr; hTexRef: PCUtexref)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefGetAddress_(pdptr, hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetAddressName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefGetArrayShell(var phArray: PCUarray; hTexRef: PCUtexref)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefGetArray_(phArray, hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetArrayName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefGetAddressModeShell(var pam: TCUaddress_mode;
-  hTexRef: PCUtexref; dim: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefGetAddressMode_(pam, hTexRef, dim);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetAddressModeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefGetFilterModeShell(var pfm: TCUfilter_mode; hTexRef: PCUtexref)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefGetFilterMode_(pfm, hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFilterModeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefGetFormatShell(var pFormat: TCUarray_format;
-  var pNumChannels: Integer; hTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefGetFormat_(pFormat, pNumChannels, hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFormatName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuTexRefGetFlagsShell(var pFlags: Cardinal; hTexRef: PCUtexref)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuTexRefGetFlags_(pFlags, hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFlagsName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuParamSetSizeShell(hfunc: PCUfunction; numbytes: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuParamSetSize_(hfunc, numbytes);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuParamSetSizeName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuParamSetiShell(hfunc: PCUfunction; offset: Integer; value: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuParamSeti_(hfunc, offset, value);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuParamSetiName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuParamSetfShell(hfunc: PCUfunction; offset: Integer; value: Single)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuParamSetf_(hfunc, offset, value);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuParamSetfName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuParamSetvShell(hfunc: PCUfunction; offset: Integer; var ptr;
-  numbytes: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuParamSetv_(hfunc, offset, ptr, numbytes);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuParamSetvName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuParamSetTexRefShell(hfunc: PCUfunction; texunit: Integer;
-  hTexRef: PCUtexref): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuParamSetTexRef_(hfunc, texunit, hTexRef);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuParamSetTexRefName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuLaunchShell(f: PCUfunction): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuLaunch_(f);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuLaunchName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuLaunchGridShell(f: PCUfunction; grid_width: Integer;
-  grid_height: Integer): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuLaunchGrid_(f, grid_width, grid_height);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuLaunchGridName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuLaunchGridAsyncShell(f: PCUfunction; grid_width: Integer;
-  grid_height: Integer; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuLaunchGridAsync_(f, grid_width, grid_height, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuLaunchGridAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuEventCreateShell(var phEvent: PCUevent; Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuEventCreate_(phEvent, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuEventCreateName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuEventRecordShell(hEvent: PCUevent; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuEventRecord_(hEvent, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuEventRecordName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuEventQueryShell(hEvent: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuEventQuery_(hEvent);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuEventQueryName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuEventSynchronizeShell(hEvent: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuEventSynchronize_(hEvent);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuEventSynchronizeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuEventDestroyShell(hEvent: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuEventDestroy_(hEvent);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuEventDestroyName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuEventElapsedTimeShell(var pMilliseconds: Single; hStart: PCUevent;
-  hEnd: PCUevent): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuEventElapsedTime_(pMilliseconds, hStart, hEnd);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuEventElapsedTimeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuStreamCreateShell(var phStream: PCUstream; Flags: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuStreamCreate_(phStream, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuStreamCreateName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuStreamQueryShell(hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuStreamQuery_(hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuStreamQueryName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuStreamSynchronizeShell(hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuStreamSynchronize_(hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuStreamSynchronizeName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuStreamDestroyShell(hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuStreamDestroy_(hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuStreamDestroyName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLCtxCreateShell(var pctx: PCUcontext; Flags: Cardinal;
-  device: TCUdevice): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLCtxCreate_(pctx, Flags, device);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuGLCtxCreateName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsGLRegisterBufferShell(var pCudaResource: PCUgraphicsResource;
-  buffer: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsGLRegisterBuffer_(pCudaResource, buffer, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsGLRegisterBufferName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsGLRegisterImageShell(var pCudaResource: PCUgraphicsResource;
-  image, target: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsGLRegisterImage_(pCudaResource, image, target, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsGLRegisterImageName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuWGLGetDeviceShell(var pDevice: TCUdevice; hGpu: HGPUNV): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuWGLGetDevice_(pDevice, hGpu);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuWGLGetDeviceName, Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsUnregisterResourceShell(resource: PCUgraphicsResource)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsUnregisterResource_(resource);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsUnregisterResourceName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsSubResourceGetMappedArrayShell(var pArray: PCUarray;
-  resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsSubResourceGetMappedArray_(pArray, resource, arrayIndex,
-    mipLevel);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuGraphicsSubResourceGetMappedArrayName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsResourceGetMappedPointerShell(var pDevPtr: TCUdeviceptr;
-  out psize: Cardinal; resource: PCUgraphicsResource): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsResourceGetMappedPointer_(pDevPtr, psize, resource);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr,
-      [cuGraphicsResourceGetMappedPointerName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsResourceSetMapFlagsShell(resource: PCUgraphicsResource;
-  Flags: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsResourceSetMapFlags_(resource, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsResourceSetMapFlagsName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsMapResourcesShell(count: Cardinal;
-  resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsMapResources_(count, resources, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsMapResourcesName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGraphicsUnmapResourcesShell(count: Cardinal;
-  resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGraphicsUnmapResources_(count, resources, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsUnmapResourcesName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLRegisterBufferObjectShell(buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLRegisterBufferObject_(buffer);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLRegisterBufferObjectName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLMapBufferObjectShell(var dptr: TCUdeviceptr; var size: Cardinal;
-  buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLMapBufferObject_(dptr, size, buffer);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLMapBufferObjectName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLUnmapBufferObjectShell(buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLUnmapBufferObject_(buffer);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnmapBufferObjectName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLUnregisterBufferObjectShell(buffer: Cardinal): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLUnregisterBufferObject_(buffer);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnregisterBufferObjectName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLSetBufferObjectMapFlagsShell(buffer: Cardinal; Flags: Cardinal)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLSetBufferObjectMapFlags_(buffer, Flags);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLSetBufferObjectMapFlagsName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLMapBufferObjectAsyncShell(var dptr: TCUdeviceptr;
-  var size: Cardinal; buffer: Cardinal; hStream: PCUstream): TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLMapBufferObjectAsync_(dptr, size, buffer, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLMapBufferObjectAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-function cuGLUnmapBufferObjectAsyncShell(buffer: Cardinal; hStream: PCUstream)
-  : TCUresult;
-{$IFDEF MSWINDOWS} stdcall;{$ELSE}cdecl;{$ENDIF}
-begin
-  Result := cuGLUnmapBufferObjectAsync_(buffer, hStream);
-  if Result <> CUDA_SUCCESS then
-    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnmapBufferObjectAsyncName,
-      Get_CUDA_API_Error_String(Result)])
-end;
-
-{$ENDIF GLS_CUDA_DEBUG_MODE}
-
-function GetProcAddressCUDA(ProcName: PAnsiChar): Pointer;
-var
-  Alt: AnsiString;
-begin
-  Alt := AnsiString(ProcName) + '_v2';
-  Result := GetProcAddress(Cardinal(CUDAHandle), PAnsiChar(Alt));
-  if Result = nil then
-      Result := GetProcAddress(Cardinal(CUDAHandle), ProcName);
-end;
-
-function InitCUDA: Boolean;
-begin
-  if CUDAHandle = INVALID_MODULEHANDLE then
-    Result := InitCUDAFromLibrary(CUDAAPIDLL)
-  else
-    Result := True;
-end;
-
-procedure CloseCUDA;
-begin
-  if CUDAHandle <> INVALID_MODULEHANDLE then
-  begin
-    FreeLibrary(Cardinal(CUDAHandle));
-    CUDAHandle := INVALID_MODULEHANDLE;
-  end;
-end;
-
-function InitCUDAFromLibrary(const LibName: WideString): Boolean;
-var
-  V: Integer;
-begin
-  Result := False;
-  CloseCUDA;
-  CUDAHandle := GetModuleHandleW(PWideChar(LibName));
-  if CUDAHandle = INVALID_MODULEHANDLE then
-    CUDAHandle := LoadLibraryW(PWideChar(LibName));
-
-  if CUDAHandle = INVALID_MODULEHANDLE then
-    Exit;
-{$IFNDEF USE_CUDA_DEBUG_MODE}
-  cuInit := GetProcAddressCUDA(cuInitName);
-  cuDriverGetVersion := GetProcAddressCUDA(cuDriverGetVersionName);
-  cuDeviceGet := GetProcAddressCUDA(cuDeviceGet_Name);
-  cuDeviceGetCount := GetProcAddressCUDA(cuDeviceGetCountName);
-  cuDeviceGetName := GetProcAddressCUDA(cuDeviceGetNameName);
-  cuDeviceComputeCapability := GetProcAddressCUDA
-    (cuDeviceComputeCapabilityName);
-  cuDeviceTotalMem := GetProcAddressCUDA(cuDeviceTotalMemName);
-  cuDeviceGetProperties := GetProcAddressCUDA(cuDeviceGetPropertiesName);
-  cuDeviceGetAttribute := GetProcAddressCUDA(cuDeviceGetAttributeName);
-  cuCtxCreate := GetProcAddressCUDA(cuCtxCreateName);
-  cuCtxDestroy := GetProcAddressCUDA(cuCtxDestroyName);
-  cuCtxAttach := GetProcAddressCUDA(cuCtxAttachName);
-  cuCtxDetach := GetProcAddressCUDA(cuCtxDetachName);
-  cuCtxPushCurrent := GetProcAddressCUDA(cuCtxPushCurrentName);
-  cuCtxPopCurrent := GetProcAddressCUDA(cuCtxPopCurrentName);
-  cuCtxGetDevice := GetProcAddressCUDA(cuCtxGetDeviceName);
-  cuCtxSynchronize := GetProcAddressCUDA(cuCtxSynchronizeName);
-  cuModuleLoad := GetProcAddressCUDA(cuModuleLoadName);
-  cuModuleLoadData := GetProcAddressCUDA(cuModuleLoadDataName);
-  cuModuleLoadDataEx := GetProcAddressCUDA(cuModuleLoadDataExName);
-  cuModuleLoadFatBinary := GetProcAddressCUDA(cuModuleLoadFatBinaryName);
-  cuModuleUnload := GetProcAddressCUDA(cuModuleUnloadName);
-  cuModuleGetFunction := GetProcAddressCUDA(cuModuleGetFunctionName);
-  cuModuleGetGlobal := GetProcAddressCUDA(cuModuleGetGlobalName);
-  cuModuleGetTexRef := GetProcAddressCUDA(cuModuleGetTexRefName);
-  cuMemGetInfo := GetProcAddressCUDA(cuMemGetInfoName);
-  cuMemAlloc := GetProcAddressCUDA(cuMemAllocName);
-  cuMemAllocPitch := GetProcAddressCUDA(cuMemAllocPitchName);
-  cuMemFree := GetProcAddressCUDA(cuMemFreeName);
-  cuMemGetAddressRange := GetProcAddressCUDA(cuMemGetAddressRangeName);
-  cuMemAllocHost := GetProcAddressCUDA(cuMemAllocHostName);
-  cuMemFreeHost := GetProcAddressCUDA(cuMemFreeHostName);
-  cuMemHostAlloc := GetProcAddressCUDA(cuMemHostAllocName);
-  cuMemHostGetDevicePointer := GetProcAddressCUDA
-    (cuMemHostGetDevicePointerName);
-  cuMemHostGetFlags := GetProcAddressCUDA(cuMemHostGetFlagsName);
-  cuMemcpyHtoD := GetProcAddressCUDA(cuMemcpyHtoDName);
-  cuMemcpyDtoH := GetProcAddressCUDA(cuMemcpyDtoHName);
-  cuMemcpyDtoD := GetProcAddressCUDA(cuMemcpyDtoDName);
-  cuMemcpyDtoDAsync := GetProcAddressCUDA(cuMemcpyDtoDAsyncName);
-  cuMemcpyDtoA := GetProcAddressCUDA(cuMemcpyDtoAName);
-  cuMemcpyAtoD := GetProcAddressCUDA(cuMemcpyAtoDName);
-  cuMemcpyHtoA := GetProcAddressCUDA(cuMemcpyHtoAName);
-  cuMemcpyAtoH := GetProcAddressCUDA(cuMemcpyAtoHName);
-  cuMemcpyAtoA := GetProcAddressCUDA(cuMemcpyAtoAName);
-  cuMemcpy2D := GetProcAddressCUDA(cuMemcpy2DName);
-  cuMemcpy2DUnaligned := GetProcAddressCUDA(cuMemcpy2DUnalignedName);
-  cuMemcpy3D := GetProcAddressCUDA(cuMemcpy3DName);
-  cuMemcpyHtoDAsync := GetProcAddressCUDA(cuMemcpyHtoDAsyncName);
-  cuMemcpyDtoHAsync := GetProcAddressCUDA(cuMemcpyDtoHAsyncName);
-  cuMemcpyHtoAAsync := GetProcAddressCUDA(cuMemcpyHtoAAsyncName);
-  cuMemcpyAtoHAsync := GetProcAddressCUDA(cuMemcpyAtoHAsyncName);
-  cuMemcpy2DAsync := GetProcAddressCUDA(cuMemcpy2DAsyncName);
-  cuMemcpy3DAsync := GetProcAddressCUDA(cuMemcpy3DAsyncName);
-  cuMemsetD8 := GetProcAddressCUDA(cuMemsetD8Name);
-  cuMemsetD16 := GetProcAddressCUDA(cuMemsetD16Name);
-  cuMemsetD32 := GetProcAddressCUDA(cuMemsetD32Name);
-  cuMemsetD2D8 := GetProcAddressCUDA(cuMemsetD2D8Name);
-  cuMemsetD2D16 := GetProcAddressCUDA(cuMemsetD2D16Name);
-  cuMemsetD2D32 := GetProcAddressCUDA(cuMemsetD2D32Name);
-  cuFuncSetBlockShape := GetProcAddressCUDA(cuFuncSetBlockShapeName);
-  cuFuncSetSharedSize := GetProcAddressCUDA(cuFuncSetSharedSizeName);
-  cuFuncGetAttribute := GetProcAddressCUDA(cuFuncGetAttributeName);
-  cuArrayCreate := GetProcAddressCUDA(cuArrayCreateName);
-  cuArrayGetDescriptor := GetProcAddressCUDA(cuArrayGetDescriptorName);
-  cuArrayDestroy := GetProcAddressCUDA(cuArrayDestroyName);
-  cuArray3DCreate := GetProcAddressCUDA(cuArray3DCreateName);
-  cuArray3DGetDescriptor := GetProcAddressCUDA(cuArray3DGetDescriptorName);
-  cuTexRefCreate := GetProcAddressCUDA(cuTexRefCreateName);
-  cuTexRefDestroy := GetProcAddressCUDA(cuTexRefDestroyName);
-  cuTexRefSetArray := GetProcAddressCUDA(cuTexRefSetArrayName);
-  cuTexRefSetAddress := GetProcAddressCUDA(cuTexRefSetAddressName);
-  cuTexRefSetAddress2D := GetProcAddressCUDA(cuTexRefSetAddress2DName);
-  cuTexRefSetFormat := GetProcAddressCUDA(cuTexRefSetFormatName);
-  cuTexRefSetAddressMode := GetProcAddressCUDA(cuTexRefSetAddressModeName);
-  cuTexRefSetFilterMode := GetProcAddressCUDA(cuTexRefSetFilterModeName);
-  cuTexRefSetFlags := GetProcAddressCUDA(cuTexRefSetFlagsName);
-  cuTexRefGetAddress := GetProcAddressCUDA(cuTexRefGetAddressName);
-  cuTexRefGetArray := GetProcAddressCUDA(cuTexRefGetArrayName);
-  cuTexRefGetAddressMode := GetProcAddressCUDA(cuTexRefGetAddressModeName);
-  cuTexRefGetFilterMode := GetProcAddressCUDA(cuTexRefGetFilterModeName);
-  cuTexRefGetFormat := GetProcAddressCUDA(cuTexRefGetFormatName);
-  cuTexRefGetFlags := GetProcAddressCUDA(cuTexRefGetFlagsName);
-  cuParamSetSize := GetProcAddressCUDA(cuParamSetSizeName);
-  cuParamSeti := GetProcAddressCUDA(cuParamSetiName);
-  cuParamSetf := GetProcAddressCUDA(cuParamSetfName);
-  cuParamSetv := GetProcAddressCUDA(cuParamSetvName);
-  cuParamSetTexRef := GetProcAddressCUDA(cuParamSetTexRefName);
-  cuLaunch := GetProcAddressCUDA(cuLaunchName);
-  cuLaunchGrid := GetProcAddressCUDA(cuLaunchGridName);
-  cuLaunchGridAsync := GetProcAddressCUDA(cuLaunchGridAsyncName);
-  cuEventCreate := GetProcAddressCUDA(cuEventCreateName);
-  cuEventRecord := GetProcAddressCUDA(cuEventRecordName);
-  cuEventQuery := GetProcAddressCUDA(cuEventQueryName);
-  cuEventSynchronize := GetProcAddressCUDA(cuEventSynchronizeName);
-  cuEventDestroy := GetProcAddressCUDA(cuEventDestroyName);
-  cuEventElapsedTime := GetProcAddressCUDA(cuEventElapsedTimeName);
-  cuStreamCreate := GetProcAddressCUDA(cuStreamCreateName);
-  cuStreamQuery := GetProcAddressCUDA(cuStreamQueryName);
-  cuStreamSynchronize := GetProcAddressCUDA(cuStreamSynchronizeName);
-  cuStreamDestroy := GetProcAddressCUDA(cuStreamDestroyName);
-  cuGLCtxCreate := GetProcAddressCUDA(cuGLCtxCreateName);
-  cuGraphicsGLRegisterBuffer := GetProcAddressCUDA
-    (cuGraphicsGLRegisterBufferName);
-  cuGraphicsGLRegisterImage := GetProcAddressCUDA
-    (cuGraphicsGLRegisterImageName);
-  cuWGLGetDevice := GetProcAddressCUDA(cuWGLGetDeviceName);
-  cuGraphicsUnregisterResource :=
-    GetProcAddressCUDA(cuGraphicsUnregisterResourceName);
-  cuGraphicsSubResourceGetMappedArray :=
-    GetProcAddressCUDA(cuGraphicsSubResourceGetMappedArrayName);
-  cuGraphicsResourceGetMappedPointer :=
-    GetProcAddressCUDA(cuGraphicsResourceGetMappedPointerName);
-  cuGraphicsResourceSetMapFlags :=
-    GetProcAddressCUDA(cuGraphicsResourceSetMapFlagsName);
-  cuGraphicsMapResources := GetProcAddressCUDA(cuGraphicsMapResourcesName);
-  cuGraphicsUnmapResources := GetProcAddressCUDA(cuGraphicsUnmapResourcesName);
-  cuGLInit := GetProcAddressCUDA(cuGLInitName);
-  cuGLRegisterBufferObject := GetProcAddressCUDA(cuGLRegisterBufferObjectName);
-  cuGLMapBufferObject := GetProcAddressCUDA(cuGLMapBufferObjectName);
-  cuGLUnmapBufferObject := GetProcAddressCUDA(cuGLUnmapBufferObjectName);
-  cuGLUnregisterBufferObject := GetProcAddressCUDA
-    (cuGLUnregisterBufferObjectName);
-  cuGLSetBufferObjectMapFlags :=
-    GetProcAddressCUDA(cuGLSetBufferObjectMapFlagsName);
-  cuGLMapBufferObjectAsync := GetProcAddressCUDA(cuGLMapBufferObjectAsyncName);
-  cuGLUnmapBufferObjectAsync := GetProcAddressCUDA
-    (cuGLUnmapBufferObjectAsyncName);
-{$ELSE}
-  cuInit_ := GetProcAddressCUDA(cuInitName);
-  cuInit := cuInitShell;
-  cuDriverGetVersion_ := GetProcAddressCUDA(cuDriverGetVersionName);
-  cuDriverGetVersion := cuDriverGetVersionShell;
-  cuDeviceGet_ := GetProcAddressCUDA(cuDeviceGet_Name);
-  cuDeviceGet := cuDeviceGetShell;
-  cuDeviceGetCount_ := GetProcAddressCUDA(cuDeviceGetCountName);
-  cuDeviceGetCount := cuDeviceGetCountShell;
-  cuDeviceGetName_ := GetProcAddressCUDA(cuDeviceGetNameName);
-  cuDeviceGetName := cuDeviceGetNameShell;
-  cuDeviceComputeCapability_ := GetProcAddressCUDA
-    (cuDeviceComputeCapabilityName);
-  cuDeviceComputeCapability := cuDeviceComputeCapabilityShell;
-  cuDeviceTotalMem_ := GetProcAddressCUDA(cuDeviceTotalMemName);
-  cuDeviceTotalMem := cuDeviceTotalMemShell;
-  cuDeviceGetProperties_ := GetProcAddressCUDA(cuDeviceGetPropertiesName);
-  cuDeviceGetProperties := cuDeviceGetPropertiesShell;
-  cuDeviceGetAttribute_ := GetProcAddressCUDA(cuDeviceGetAttributeName);
-  cuDeviceGetAttribute := cuDeviceGetAttributeShell;
-  cuCtxCreate_ := GetProcAddressCUDA(cuCtxCreateName);
-  cuCtxCreate := cuCtxCreateShell;
-  cuCtxDestroy_ := GetProcAddressCUDA(cuCtxDestroyName);
-  cuCtxDestroy := cuCtxDestroyShell;
-  cuCtxAttach_ := GetProcAddressCUDA(cuCtxAttachName);
-  cuCtxAttach := cuCtxAttachShell;
-  cuCtxDetach_ := GetProcAddressCUDA(cuCtxDetachName);
-  cuCtxDetach := cuCtxDetachShell;
-  cuCtxPushCurrent_ := GetProcAddressCUDA(cuCtxPushCurrentName);
-  cuCtxPushCurrent := cuCtxPushCurrentShell;
-  cuCtxPopCurrent_ := GetProcAddressCUDA(cuCtxPopCurrentName);
-  cuCtxPopCurrent := cuCtxPopCurrentShell;
-  cuCtxGetDevice_ := GetProcAddressCUDA(cuCtxGetDeviceName);
-  cuCtxGetDevice := cuCtxGetDeviceShell;
-  cuCtxSynchronize_ := GetProcAddressCUDA(cuCtxSynchronizeName);
-  cuCtxSynchronize := cuCtxSynchronizeShell;
-  cuModuleLoad_ := GetProcAddressCUDA(cuModuleLoadName);
-  cuModuleLoad := cuModuleLoadShell;
-  cuModuleLoadData_ := GetProcAddressCUDA(cuModuleLoadDataName);
-  cuModuleLoadData := cuModuleLoadDataShell;
-  cuModuleLoadDataEx_ := GetProcAddressCUDA(cuModuleLoadDataExName);
-  cuModuleLoadDataEx := cuModuleLoadDataExShell;
-  cuModuleLoadFatBinary_ := GetProcAddressCUDA(cuModuleLoadFatBinaryName);
-  cuModuleLoadFatBinary := cuModuleLoadFatBinaryShell;
-  cuModuleUnload_ := GetProcAddressCUDA(cuModuleUnloadName);
-  cuModuleUnload := cuModuleUnloadShell;
-  cuModuleGetFunction_ := GetProcAddressCUDA(cuModuleGetFunctionName);
-  cuModuleGetFunction := cuModuleGetFunctionShell;
-  cuModuleGetGlobal_ := GetProcAddressCUDA(cuModuleGetGlobalName);
-  cuModuleGetGlobal := cuModuleGetGlobalShell;
-  cuModuleGetTexRef_ := GetProcAddressCUDA(cuModuleGetTexRefName);
-  cuModuleGetTexRef := cuModuleGetTexRefShell;
-  cuMemGetInfo_ := GetProcAddressCUDA(cuMemGetInfoName);
-  cuMemGetInfo := cuMemGetInfoShell;
-  cuMemAlloc_ := GetProcAddressCUDA(cuMemAllocName);
-  cuMemAlloc := cuMemAllocShell;
-  cuMemAllocPitch_ := GetProcAddressCUDA(cuMemAllocPitchName);
-  cuMemAllocPitch := cuMemAllocPitchShell;
-  cuMemFree_ := GetProcAddressCUDA(cuMemFreeName);
-  cuMemFree := cuMemFreeShell;
-  cuMemGetAddressRange_ := GetProcAddressCUDA(cuMemGetAddressRangeName);
-  cuMemGetAddressRange := cuMemGetAddressRangeShell;
-  cuMemAllocHost_ := GetProcAddressCUDA(cuMemAllocHostName);
-  cuMemAllocHost := cuMemAllocHostShell;
-  cuMemFreeHost_ := GetProcAddressCUDA(cuMemFreeHostName);
-  cuMemFreeHost := cuMemFreeHostShell;
-  cuMemHostAlloc_ := GetProcAddressCUDA(cuMemHostAllocName);
-  cuMemHostAlloc := cuMemHostAllocShell;
-  cuMemHostGetDevicePointer_ := GetProcAddressCUDA
-    (cuMemHostGetDevicePointerName);
-  cuMemHostGetDevicePointer := cuMemHostGetDevicePointerShell;
-  cuMemHostGetFlags_ := GetProcAddressCUDA(cuMemHostGetFlagsName);
-  cuMemHostGetFlags := cuMemHostGetFlagsShell;
-  cuMemcpyHtoD_ := GetProcAddressCUDA(cuMemcpyHtoDName);
-  cuMemcpyHtoD := cuMemcpyHtoDShell;
-  cuMemcpyDtoH_ := GetProcAddressCUDA(cuMemcpyDtoHName);
-  cuMemcpyDtoH := cuMemcpyDtoHShell;
-  cuMemcpyDtoD_ := GetProcAddressCUDA(cuMemcpyDtoDName);
-  cuMemcpyDtoD := cuMemcpyDtoDShell;
-  cuMemcpyDtoDAsync_ := GetProcAddressCUDA(cuMemcpyDtoDAsyncName);
-  cuMemcpyDtoDAsync := cuMemcpyDtoDAsyncShell;
-  cuMemcpyDtoA_ := GetProcAddressCUDA(cuMemcpyDtoAName);
-  cuMemcpyDtoA := cuMemcpyDtoAShell;
-  cuMemcpyAtoD_ := GetProcAddressCUDA(cuMemcpyAtoDName);
-  cuMemcpyAtoD := cuMemcpyAtoDShell;
-  cuMemcpyHtoA_ := GetProcAddressCUDA(cuMemcpyHtoAName);
-  cuMemcpyHtoA := cuMemcpyHtoAShell;
-  cuMemcpyAtoH_ := GetProcAddressCUDA(cuMemcpyAtoHName);
-  cuMemcpyAtoH := cuMemcpyAtoHShell;
-  cuMemcpyAtoA_ := GetProcAddressCUDA(cuMemcpyAtoAName);
-  cuMemcpyAtoA := cuMemcpyAtoAShell;
-  cuMemcpy2D_ := GetProcAddressCUDA(cuMemcpy2DName);
-  cuMemcpy2D := cuMemcpy2DShell;
-  cuMemcpy2DUnaligned_ := GetProcAddressCUDA(cuMemcpy2DUnalignedName);
-  cuMemcpy2DUnaligned := cuMemcpy2DUnalignedShell;
-  cuMemcpy3D_ := GetProcAddressCUDA(cuMemcpy3DName);
-  cuMemcpy3D := cuMemcpy3DShell;
-  cuMemcpyHtoDAsync_ := GetProcAddressCUDA(cuMemcpyHtoDAsyncName);
-  cuMemcpyHtoDAsync := cuMemcpyHtoDAsyncShell;
-  cuMemcpyDtoHAsync_ := GetProcAddressCUDA(cuMemcpyDtoHAsyncName);
-  cuMemcpyDtoHAsync := cuMemcpyDtoHAsyncShell;
-  cuMemcpyHtoAAsync_ := GetProcAddressCUDA(cuMemcpyHtoAAsyncName);
-  cuMemcpyHtoAAsync := cuMemcpyHtoAAsyncShell;
-  cuMemcpyAtoHAsync_ := GetProcAddressCUDA(cuMemcpyAtoHAsyncName);
-  cuMemcpyAtoHAsync := cuMemcpyAtoHAsyncShell;
-  cuMemcpy2DAsync_ := GetProcAddressCUDA(cuMemcpy2DAsyncName);
-  cuMemcpy2DAsync := cuMemcpy2DAsyncShell;
-  cuMemcpy3DAsync_ := GetProcAddressCUDA(cuMemcpy3DAsyncName);
-  cuMemcpy3DAsync := cuMemcpy3DAsyncShell;
-  cuMemsetD8_ := GetProcAddressCUDA(cuMemsetD8Name);
-  cuMemsetD8 := cuMemsetD8Shell;
-  cuMemsetD16_ := GetProcAddressCUDA(cuMemsetD16Name);
-  cuMemsetD16 := cuMemsetD16Shell;
-  cuMemsetD32_ := GetProcAddressCUDA(cuMemsetD32Name);
-  cuMemsetD32 := cuMemsetD32Shell;
-  cuMemsetD2D8_ := GetProcAddressCUDA(cuMemsetD2D8Name);
-  cuMemsetD2D8 := cuMemsetD2D8Shell;
-  cuMemsetD2D16_ := GetProcAddressCUDA(cuMemsetD2D16Name);
-  cuMemsetD2D16 := cuMemsetD2D16Shell;
-  cuMemsetD2D32_ := GetProcAddressCUDA(cuMemsetD2D32Name);
-  cuMemsetD2D32 := cuMemsetD2D32Shell;
-  cuFuncSetBlockShape_ := GetProcAddressCUDA(cuFuncSetBlockShapeName);
-  cuFuncSetBlockShape := cuFuncSetBlockShapeShell;
-  cuFuncSetSharedSize_ := GetProcAddressCUDA(cuFuncSetSharedSizeName);
-  cuFuncSetSharedSize := cuFuncSetSharedSizeShell;
-  cuFuncGetAttribute_ := GetProcAddressCUDA(cuFuncGetAttributeName);
-  cuFuncGetAttribute := cuFuncGetAttributeShell;
-  cuArrayCreate_ := GetProcAddressCUDA(cuArrayCreateName);
-  cuArrayCreate := cuArrayCreateShell;
-  cuArrayGetDescriptor_ := GetProcAddressCUDA(cuArrayGetDescriptorName);
-  cuArrayGetDescriptor := cuArrayGetDescriptorShell;
-  cuArrayDestroy_ := GetProcAddressCUDA(cuArrayDestroyName);
-  cuArrayDestroy := cuArrayDestroyShell;
-  cuArray3DCreate_ := GetProcAddressCUDA(cuArray3DCreateName);
-  cuArray3DCreate := cuArray3DCreateShell;
-  cuArray3DGetDescriptor_ := GetProcAddressCUDA(cuArray3DGetDescriptorName);
-  cuArray3DGetDescriptor := cuArray3DGetDescriptorShell;
-  cuTexRefCreate_ := GetProcAddressCUDA(cuTexRefCreateName);
-  cuTexRefCreate := cuTexRefCreateShell;
-  cuTexRefDestroy_ := GetProcAddressCUDA(cuTexRefDestroyName);
-  cuTexRefDestroy := cuTexRefDestroyShell;
-  cuTexRefSetArray_ := GetProcAddressCUDA(cuTexRefSetArrayName);
-  cuTexRefSetArray := cuTexRefSetArrayShell;
-  cuTexRefSetAddress_ := GetProcAddressCUDA(cuTexRefSetAddressName);
-  cuTexRefSetAddress := cuTexRefSetAddressShell;
-  cuTexRefSetAddress2D_ := GetProcAddressCUDA(cuTexRefSetAddress2DName);
-  cuTexRefSetAddress2D := cuTexRefSetAddress2DShell;
-  cuTexRefSetFormat_ := GetProcAddressCUDA(cuTexRefSetFormatName);
-  cuTexRefSetFormat := cuTexRefSetFormatShell;
-  cuTexRefSetAddressMode_ := GetProcAddressCUDA(cuTexRefSetAddressModeName);
-  cuTexRefSetAddressMode := cuTexRefSetAddressModeShell;
-  cuTexRefSetFilterMode_ := GetProcAddressCUDA(cuTexRefSetFilterModeName);
-  cuTexRefSetFilterMode := cuTexRefSetFilterModeShell;
-  cuTexRefSetFlags_ := GetProcAddressCUDA(cuTexRefSetFlagsName);
-  cuTexRefSetFlags := cuTexRefSetFlagsShell;
-  cuTexRefGetAddress_ := GetProcAddressCUDA(cuTexRefGetAddressName);
-  cuTexRefGetAddress := cuTexRefGetAddressShell;
-  cuTexRefGetArray_ := GetProcAddressCUDA(cuTexRefGetArrayName);
-  cuTexRefGetArray := cuTexRefGetArrayShell;
-  cuTexRefGetAddressMode_ := GetProcAddressCUDA(cuTexRefGetAddressModeName);
-  cuTexRefGetAddressMode := cuTexRefGetAddressModeShell;
-  cuTexRefGetFilterMode_ := GetProcAddressCUDA(cuTexRefGetFilterModeName);
-  cuTexRefGetFilterMode := cuTexRefGetFilterModeShell;
-  cuTexRefGetFormat_ := GetProcAddressCUDA(cuTexRefGetFormatName);
-  cuTexRefGetFormat := cuTexRefGetFormatShell;
-  cuTexRefGetFlags_ := GetProcAddressCUDA(cuTexRefGetFlagsName);
-  cuTexRefGetFlags := cuTexRefGetFlagsShell;
-  cuParamSetSize_ := GetProcAddressCUDA(cuParamSetSizeName);
-  cuParamSetSize := cuParamSetSizeShell;
-  cuParamSeti_ := GetProcAddressCUDA(cuParamSetiName);
-  cuParamSeti := cuParamSetiShell;
-  cuParamSetf_ := GetProcAddressCUDA(cuParamSetfName);
-  cuParamSetf := cuParamSetfShell;
-  cuParamSetv_ := GetProcAddressCUDA(cuParamSetvName);
-  cuParamSetv := cuParamSetvShell;
-  cuParamSetTexRef_ := GetProcAddressCUDA(cuParamSetTexRefName);
-  cuParamSetTexRef := cuParamSetTexRefShell;
-  cuLaunch_ := GetProcAddressCUDA(cuLaunchName);
-  cuLaunch := cuLaunchShell;
-  cuLaunchGrid_ := GetProcAddressCUDA(cuLaunchGridName);
-  cuLaunchGrid := cuLaunchGridShell;
-  cuLaunchGridAsync_ := GetProcAddressCUDA(cuLaunchGridAsyncName);
-  cuLaunchGridAsync := cuLaunchGridAsyncShell;
-  cuEventCreate_ := GetProcAddressCUDA(cuEventCreateName);
-  cuEventCreate := cuEventCreateShell;
-  cuEventRecord_ := GetProcAddressCUDA(cuEventRecordName);
-  cuEventRecord := cuEventRecordShell;
-  cuEventQuery_ := GetProcAddressCUDA(cuEventQueryName);
-  cuEventQuery := cuEventQueryShell;
-  cuEventSynchronize_ := GetProcAddressCUDA(cuEventSynchronizeName);
-  cuEventSynchronize := cuEventSynchronizeShell;
-  cuEventDestroy_ := GetProcAddressCUDA(cuEventDestroyName);
-  cuEventDestroy := cuEventDestroyShell;
-  cuEventElapsedTime_ := GetProcAddressCUDA(cuEventElapsedTimeName);
-  cuEventElapsedTime := cuEventElapsedTimeShell;
-  cuStreamCreate_ := GetProcAddressCUDA(cuStreamCreateName);
-  cuStreamCreate := cuStreamCreateShell;
-  cuStreamQuery_ := GetProcAddressCUDA(cuStreamQueryName);
-  cuStreamQuery := cuStreamQueryShell;
-  cuStreamSynchronize_ := GetProcAddressCUDA(cuStreamSynchronizeName);
-  cuStreamSynchronize := cuStreamSynchronizeShell;
-  cuStreamDestroy_ := GetProcAddressCUDA(cuStreamDestroyName);
-  cuStreamDestroy := cuStreamDestroyShell;
-  cuGLCtxCreate_ := GetProcAddressCUDA(cuGLCtxCreateName);
-  cuGLCtxCreate := cuGLCtxCreateShell;
-  cuGraphicsGLRegisterBuffer_ :=
-    GetProcAddressCUDA(cuGraphicsGLRegisterBufferName);
-  cuGraphicsGLRegisterBuffer := cuGraphicsGLRegisterBufferShell;
-  cuGraphicsGLRegisterImage_ := GetProcAddressCUDA
-    (cuGraphicsGLRegisterImageName);
-  cuGraphicsGLRegisterImage := cuGraphicsGLRegisterImageShell;
-  cuWGLGetDevice_ := GetProcAddressCUDA(cuWGLGetDeviceName);
-  cuWGLGetDevice := cuWGLGetDeviceShell;
-  cuGraphicsUnregisterResource_ :=
-    GetProcAddressCUDA(cuGraphicsUnregisterResourceName);
-  cuGraphicsUnregisterResource := cuGraphicsUnregisterResourceShell;
-  cuGraphicsSubResourceGetMappedArray_ :=
-    GetProcAddressCUDA(cuGraphicsSubResourceGetMappedArrayName);
-  cuGraphicsSubResourceGetMappedArray :=
-    cuGraphicsSubResourceGetMappedArrayShell;
-  cuGraphicsResourceGetMappedPointer_ :=
-    GetProcAddressCUDA(cuGraphicsResourceGetMappedPointerName);
-  cuGraphicsResourceGetMappedPointer := cuGraphicsResourceGetMappedPointerShell;
-  cuGraphicsResourceSetMapFlags_ :=
-    GetProcAddressCUDA(cuGraphicsResourceSetMapFlagsName);
-  cuGraphicsResourceSetMapFlags := cuGraphicsResourceSetMapFlagsShell;
-  cuGraphicsMapResources_ := GetProcAddressCUDA(cuGraphicsMapResourcesName);
-  cuGraphicsMapResources := cuGraphicsMapResourcesShell;
-  cuGraphicsUnmapResources_ := GetProcAddressCUDA(cuGraphicsUnmapResourcesName);
-  cuGraphicsUnmapResources := cuGraphicsUnmapResourcesShell;
-  cuGLInit := GetProcAddressCUDA(cuGLInitName);
-  cuGLRegisterBufferObject_ := GetProcAddressCUDA(cuGLRegisterBufferObjectName);
-  cuGLRegisterBufferObject := cuGLRegisterBufferObjectShell;
-  cuGLMapBufferObject_ := GetProcAddressCUDA(cuGLMapBufferObjectName);
-  cuGLMapBufferObject := cuGLMapBufferObjectShell;
-  cuGLUnmapBufferObject_ := GetProcAddressCUDA(cuGLUnmapBufferObjectName);
-  cuGLUnmapBufferObject := cuGLUnmapBufferObjectShell;
-  cuGLUnregisterBufferObject_ :=
-    GetProcAddressCUDA(cuGLUnregisterBufferObjectName);
-  cuGLUnregisterBufferObject := cuGLUnregisterBufferObjectShell;
-  cuGLSetBufferObjectMapFlags_ :=
-    GetProcAddressCUDA(cuGLSetBufferObjectMapFlagsName);
-  cuGLSetBufferObjectMapFlags := cuGLSetBufferObjectMapFlagsShell;
-  cuGLMapBufferObjectAsync_ := GetProcAddressCUDA(cuGLMapBufferObjectAsyncName);
-  cuGLMapBufferObjectAsync := cuGLMapBufferObjectAsyncShell;
-  cuGLUnmapBufferObjectAsync_ :=
-    GetProcAddressCUDA(cuGLUnmapBufferObjectAsyncName);
-  cuGLUnmapBufferObjectAsync := cuGLUnmapBufferObjectAsyncShell;
-{$ENDIF GLS_CUDA_DEBUG_MODE}
-  cuDriverGetVersion(V);
-  {$IFDEF USE_LOGGING}
-    LogInfoFmt('%s version %d is loaded', [CUDAAPIDLL, V]);
-  {$ENDIF}
-  Result := True;
-end;
-
-function IsCUDAInitialized: Boolean;
-begin
-  Result := (CUDAHandle <> INVALID_MODULEHANDLE);
-end;
-
-function Get_CUDA_API_Error_String(AError: TCUresult): string;
-begin
-  if AError = CUDA_SUCCESS then
-    Result := 'No errors'
-  else if AError = CUDA_ERROR_INVALID_VALUE then
-    Result := 'Invalid value'
-  else if AError = CUDA_ERROR_OUT_OF_MEMORY then
-    Result := 'Out of memory'
-  else if AError = CUDA_ERROR_NOT_INITIALIZED then
-    Result := 'Driver not initialized'
-  else if AError = CUDA_ERROR_DEINITIALIZED then
-    Result := 'Driver deinitialized'
-  else if AError = CUDA_ERROR_NO_DEVICE then
-    Result := 'No CUDA-capable device available'
-  else if AError = CUDA_ERROR_INVALID_DEVICE then
-    Result := 'Invalid device'
-  else if AError = CUDA_ERROR_INVALID_IMAGE then
-    Result := 'Invalid kernel image'
-  else if AError = CUDA_ERROR_INVALID_CONTEXT then
-    Result := 'Invalid context'
-  else if AError = CUDA_ERROR_CONTEXT_ALREADY_CURRENT then
-    Result := 'Context already current'
-  else if AError = CUDA_ERROR_MAP_FAILED then
-    Result := 'Map failed'
-  else if AError = CUDA_ERROR_UNMAP_FAILED then
-    Result := 'Unmap failed'
-  else if AError = CUDA_ERROR_ARRAY_IS_MAPPED then
-    Result := 'Array is mapped'
-  else if AError = CUDA_ERROR_ALREADY_MAPPED then
-    Result := 'Already mapped'
-  else if AError = CUDA_ERROR_NO_BINARY_FOR_GPU then
-    Result := 'No binary for GPU'
-  else if AError = CUDA_ERROR_ALREADY_ACQUIRED then
-    Result := 'Already acquired'
-  else if AError = CUDA_ERROR_NOT_MAPPED then
-    Result := 'Not mapped'
-  else if AError = CUDA_ERROR_NOT_MAPPED_AS_ARRAY then
-    Result := 'Not mapped as array'
-  else if AError = CUDA_ERROR_NOT_MAPPED_AS_POINTER then
-    Result := 'Not mapped as pointer'
-  else if AError = CUDA_ERROR_INVALID_SOURCE then
-    Result := 'Invalid source'
-  else if AError = CUDA_ERROR_FILE_NOT_FOUND then
-    Result := 'File not found'
-  else if AError = CUDA_ERROR_INVALID_HANDLE then
-    Result := 'Invalid handle'
-  else if AError = CUDA_ERROR_NOT_FOUND then
-    Result := 'Not found'
-  else if AError = CUDA_ERROR_NOT_READY then
-    Result := 'CUDA not ready'
-  else if AError = CUDA_ERROR_LAUNCH_FAILED then
-    Result := 'Launch failed'
-  else if AError = CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES then
-    Result := 'Launch exceeded resources'
-  else if AError = CUDA_ERROR_LAUNCH_TIMEOUT then
-    Result := 'Launch exceeded timeout'
-  else if AError = CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING then
-    Result := 'Launch with incompatible texturing'
-  else if AError = CUDA_ERROR_POINTER_IS_64BIT then
-    Result := 'Pointer is 64bit'
-  else if AError = CUDA_ERROR_SIZE_IS_64BIT then
-    Result := 'Size is 64bit'
-  else
-    Result := 'Unknown error';
-end;
-
-end.
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit Import.CUDAApi;
+
+(*
+ * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ *)
+
+interface
+
+uses
+  Winapi.Windows;
+
+
+const
+  CUDAAPIDLL = 'nvcuda.dll';
+
+type
+  // CUDA device pointer
+  TCUdeviceptr = Pointer;
+
+  // CUDA device
+  TCUdevice = Integer;
+
+  // CUDA context
+  PCUcontext = ^TCUcontext;
+  TCUcontext = record
+  end;
+
+  // CUDA module
+  PCUmodule = ^TCUmodule;
+  TCUmodule = record
+  end;
+
+  // CUDA function
+  PCUfunction = ^TCUfunction;
+  TCUfunction = record
+  end;
+
+  // CUDA array
+  PCUarray = ^TCUarray;
+  TCUarray = record
+  end;
+
+  // CUDA texture reference
+  PCUtexref = ^TCUtexref;
+  TCUtexref = record
+  end;
+
+  // CUDA event
+  PCUevent = ^TCUevent;
+  TCUevent = record
+  end;
+
+  // CUDA stream
+  PCUstream = ^TCUstream;
+  TCUstream = record
+  end;
+
+  // CUDA graphics interop resource
+  PPCUgraphicsResource = ^PCUgraphicsResource;
+  PCUgraphicsResource = ^TCUgraphicsResource;
+  TCUgraphicsResource = record
+  end;
+
+  // Context creation flags
+  TCUctx_flags = (
+    // Automatic scheduling
+    CU_CTX_SCHED_AUTO = 0,
+    // Set spin as default scheduling
+    CU_CTX_SCHED_SPIN = 1,
+    // Set yield as default scheduling
+    CU_CTX_SCHED_YIELD = 2,
+    CU_CTX_SCHED_MASK = 3, 
+	// Use blocking synchronization
+	CU_CTX_BLOCKING_SYNC = 4,
+    // Support mapped pinned allocations
+    CU_CTX_MAP_HOST = 8,
+    CU_CTX_FLAGS_MASK = 15);
+
+  // Event creation flags
+  TCUevent_flags = (
+    // Default event flag
+    CU_EVENT_DEFAULT = 0,
+    // Event uses blocking synchronization
+    CU_EVENT_BLOCKING_SYNC = 1
+    );
+
+  // Array formats
+  TCUarray_format = (
+    // Unsigned 8-bit integers
+    CU_AD_FORMAT_UNSIGNED_INT8 = $01,
+    // Unsigned 16-bit integers
+    CU_AD_FORMAT_UNSIGNED_INT16 = $02,
+    // Unsigned 32-bit integers
+    CU_AD_FORMAT_UNSIGNED_INT32 = $03,
+    // Signed 8-bit integers
+    CU_AD_FORMAT_SIGNED_INT8 = $08,
+    // Signed 16-bit integers   
+    CU_AD_FORMAT_SIGNED_INT16 = $09,
+    // Signed 32-bit integers
+    CU_AD_FORMAT_SIGNED_INT32 = $0A,
+    // 16-bit floating point
+    CU_AD_FORMAT_HALF = $10,
+    // 32-bit floating point
+    CU_AD_FORMAT_FLOAT = $20
+    );
+
+  // Texture reference addressing modes
+  TCUaddress_mode = (
+    // Wrapping address mode
+    CU_TR_ADDRESS_MODE_WRAP = 0,
+    // Clamp to edge address mode
+    CU_TR_ADDRESS_MODE_CLAMP = 1,
+    // Mirror address mode
+    CU_TR_ADDRESS_MODE_MIRROR = 2
+    );
+
+  // Texture reference filtering modes
+  TCUfilter_mode = (
+    // Point filter mode
+	CU_TR_FILTER_MODE_POINT = 0,
+    // Linear filter mode
+    CU_TR_FILTER_MODE_LINEAR = 1
+    );
+
+  // Device properties
+  TCUdevice_attribute = (
+    // Maximum number of threads per block
+    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
+    // Maximum block dimension X
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
+    // Maximum block dimension Y
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
+    // Maximum block dimension Z
+    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
+    // Maximum grid dimension X
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
+    // Maximum grid dimension Y
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
+    // Maximum grid dimension Z
+    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
+    // Maximum shared memory available per block in bytes
+    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
+    // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
+    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
+    // Memory available on device for __constant__ variables in a CUDA C kernel in bytes
+    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
+    // Warp size in threads
+    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
+    // Maximum pitch in bytes allowed by memory copies
+    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
+    // Maximum number of 32-bit registers available per block
+    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
+    // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
+    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
+    // Peak clock frequency in kilohertz
+    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
+    // Alignment requirement for textures
+    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
+    // Device can possibly copy memory and execute a kernel concurrently
+    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
+    // Number of multiprocessors on device    
+    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
+    // Specifies whether there is a run time limit on kernels
+    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
+    // Device is integrated with host memory
+    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
+    // Device can map host memory into CUDA address space
+    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
+    // Compute mode (See ::CUcomputemode for details)
+    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
+    );
+
+  (* *
+    * CUDA Limits
+  *)
+  TcudaLimit = (
+    // GPU thread stack size
+	cudaLimitStackSize = $00,
+    // GPU printf FIFO size
+    cudaLimitPrintfFifoSize = $01
+    );
+
+  // Legacy device properties
+  TCUdevprop = record
+    // Maximum number of threads per block
+    maxThreadsPerBlock: Integer;
+    // Maximum size of each dimension of a block
+	maxThreadsDim: array [0 .. 2] of Integer;
+    // Maximum size of each dimension of a grid
+    maxGridSize: array [0 .. 2] of Integer;
+    // Shared memory available per block in bytes
+    sharedMemPerBlock: Integer;
+    // Constant memory available on device in bytes
+    totalConstantMemory: Integer;
+    // Warp size in threads
+    SIMDWidth: Integer;
+    // Maximum pitch in bytes allowed by memory copies
+    memPitch: Integer;
+    // 32-bit registers available per block
+    regsPerBlock: Integer;
+    // Clock frequency in kilohertz
+    clockRate: Integer;
+    // Alignment requirement for textures
+    textureAlign: Integer;
+  end;
+
+  // Function properties
+  TCUfunction_attribute = (
+
+    (* The number of threads beyond which a launch of the function would fail.
+     * This number depends on both the function and the device on which the
+     * function is currently loaded. *)
+    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
+
+    (* The size in bytes of statically-allocated shared memory required by
+     * this function. This does not include dynamically-allocated shared
+     * memory requested by the user at runtime. *)
+    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
+
+    { * The size in bytes of user-allocated constant memory required by this
+      * function. }
+    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
+
+    { * The size in bytes of thread local memory used by this function. }
+    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
+
+    { * The number of registers used by each thread of this function. }
+    CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
+
+    CU_FUNC_ATTRIBUTE_MAX);
+
+  // Memory types
+  TCUmemorytype = (
+    // Host memory
+	CU_MEMORYTYPE_HOST = $01,
+    // Device memory
+    CU_MEMORYTYPE_DEVICE = $02,
+    // Array memory
+    CU_MEMORYTYPE_ARRAY = $03
+    );
+
+  // Compute Modes
+  TCUcomputemode = (
+    // Default compute mode (Multiple contexts allowed per device)
+	CU_COMPUTEMODE_DEFAULT = 0,
+    // Compute-exclusive mode (Only one context can be present on this device at a time)
+    CU_COMPUTEMODE_EXCLUSIVE = 1,
+    // Compute-prohibited mode (No contexts can be created on this device at this time)
+    CU_COMPUTEMODE_PROHIBITED = 2
+    );
+
+  // Online compiler options
+  TCUjit_option = (
+    { * Max number of registers that a thread may use. }
+
+    CU_JIT_MAX_REGISTERS = 0,
+
+    { * IN: Specifies minimum number of threads per block to target compilation
+      * for\n
+      * OUT: Returns the number of threads the compiler actually targeted.
+      * This restricts the resource utilization fo the compiler (e.g. max
+      * registers) such that a block with the given number of threads should be
+      * able to launch based on register limitations. Note, this option does not
+      * currently take into account any other resource limitations, such as
+      * shared memory utilization. }
+    CU_JIT_THREADS_PER_BLOCK,
+
+    { * Returns a float value in the option of the wall clock time, in
+      * milliseconds, spent creating the cubin }
+    CU_JIT_WALL_TIME,
+
+    { * Pointer to a buffer in which to print any log messsages from PTXAS
+      * that are informational in nature }
+    CU_JIT_INFO_LOG_BUFFER,
+
+    { * IN: Log buffer size in bytes.  Log messages will be capped at this size
+      * (including null terminator)\n
+      * OUT: Amount of log buffer filled with messages }
+    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+
+    { * Pointer to a buffer in which to print any log messages from PTXAS that
+      * reflect errors }
+    CU_JIT_ERROR_LOG_BUFFER,
+
+    { * IN: Log buffer size in bytes.  Log messages will be capped at this size
+      * (including null terminator)\n
+      * OUT: Amount of log buffer filled with messages }
+    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+
+    { * Level of optimizations to apply to generated code (0 - 4), with 4
+      * being the default and highest level of optimizations. }
+    CU_JIT_OPTIMIZATION_LEVEL,
+
+    { * No option value required. Determines the target based on the current
+      * attached context (default) }
+    CU_JIT_TARGET_FROM_CUCONTEXT,
+
+    { * Target is chosen based on supplied CUjit_target_enum. }
+    CU_JIT_TARGET,
+
+    { * Specifies choice of fallback strategy if matching cubin is not found.
+      * Choice is based on supplied CUjit_fallback_enum. }
+    CU_JIT_FALLBACK_STRATEGY );
+
+  // Online compilation targets
+  TCUjit_target = (
+    // Compute device class 1.0
+    CU_TARGET_COMPUTE_10 = 0,
+	// Compute device class 1.1
+    CU_TARGET_COMPUTE_11,
+    // Compute device class 1.2
+    CU_TARGET_COMPUTE_12,
+    // Compute device class 1.3
+    CU_TARGET_COMPUTE_13
+    );
+
+  // Cubin matching fallback strategies
+  TCUjit_fallback = (
+    // ** Prefer to compile ptx */
+    CU_PREFER_PTX = 0,
+    // ** Prefer to fall back to compatible binary code */
+    CU_PREFER_BINARY);
+
+  // Flags to register a graphics resource
+  TCUgraphicsRegisterFlags = (CU_GRAPHICS_REGISTER_FLAGS_NONE = $00000000);
+
+  // Flags for mapping and unmapping interop resources
+  TCUgraphicsMapResourceFlags =
+    (CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = $00000000,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = $00000001,
+    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = $00000002);
+
+  // Array indices for cube faces
+  TCUarray_cubemap_face = (
+    // Positive X face of cubemap
+	CU_CUBEMAP_FACE_POSITIVE_X = $00000000,
+    // Negative X face of cubemap
+    CU_CUBEMAP_FACE_NEGATIVE_X = $00000001,
+    // Positive Y face of cubemap
+    CU_CUBEMAP_FACE_POSITIVE_Y = $00000002,
+    // Negative Y face of cubemap
+    CU_CUBEMAP_FACE_NEGATIVE_Y = $00000003,
+    // Positive Z face of cubemap
+    CU_CUBEMAP_FACE_POSITIVE_Z = $00000004,
+    // Negative Z face of cubemap
+    CU_CUBEMAP_FACE_NEGATIVE_Z = $00000005
+    );
+
+  (*
+    * CUDA function attributes
+  *)
+
+  TcudaFuncAttributes = record
+    // Size of shared memory in bytes
+    sharedSizeBytes: NativeUInt;
+	// Size of constant memory in bytes
+    constSizeBytes: NativeUInt;
+    // Size of local memory in bytes
+    localSizeBytes: NativeUInt;
+    // Maximum number of threads per block
+    maxThreadsPerBlock: Integer;
+    // Number of registers used
+    numRegs: Integer;
+    
+    (* \brief PTX virtual architecture version for which the function was
+      *  compiled. This value is the major PTX version * 10 + the minor PTX
+      *  version, so a PTX version 1.3 function would return the value 13.
+      *  For device emulation kernels, this is set to 9999. *)
+    ptxVersion: Integer;
+    (* * \brief Binary architecture version for which the function was compiled.
+      *  This value is the major binary version * 10 + the minor binary version,
+      *  so a binary version 1.3 function would return the value 13.
+      *  For device emulation kernels, this is set to 9999. *)
+    binaryVersion: Integer;
+    __cudaReserved: array [0 .. 5] of Integer;
+  end;
+
+  (* *
+    * CUDA function cache configurations
+  *)
+
+  TcudaFuncCache = (
+    // Default function cache configuration, no preference
+	cudaFuncCachePreferNone = 0,
+    // Prefer larger shared memory and smaller L1 cache
+    cudaFuncCachePreferShared = 1,
+    // Prefer larger L1 cache and smaller shared memory
+    cudaFuncCachePreferL1 = 2
+    );
+
+  // ************************************
+  // **
+  // **    Error codes
+  // **
+  // ***********************************/
+
+  // Error codes
+
+  TCUresult = type Cardinal;
+
+const
+  CUDA_SUCCESS: TCUresult = 0;          /// < No errors
+  CUDA_ERROR_INVALID_VALUE = 1;         /// < Invalid value
+  CUDA_ERROR_OUT_OF_MEMORY = 2;         /// < Out of memory
+  CUDA_ERROR_NOT_INITIALIZED = 3;       /// < Driver not initialized
+  CUDA_ERROR_DEINITIALIZED = 4;         /// < Driver deinitialized
+
+  CUDA_ERROR_NO_DEVICE = 100;           /// < No CUDA-capable device available
+  CUDA_ERROR_INVALID_DEVICE = 101;      /// < Invalid device
+
+  CUDA_ERROR_INVALID_IMAGE = 200;       /// < Invalid kernel image
+  CUDA_ERROR_INVALID_CONTEXT = 201;     /// < Invalid context
+  CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202;  /// < Context already current
+  CUDA_ERROR_MAP_FAILED = 205;          /// < Map failed
+  CUDA_ERROR_UNMAP_FAILED = 206;        /// < Unmap failed
+  CUDA_ERROR_ARRAY_IS_MAPPED = 207;     /// < Array is mapped
+  CUDA_ERROR_ALREADY_MAPPED = 208;      /// < Already mapped
+  CUDA_ERROR_NO_BINARY_FOR_GPU = 209;   /// < No binary for GPU
+  CUDA_ERROR_ALREADY_ACQUIRED = 210;    /// < Already acquired
+  CUDA_ERROR_NOT_MAPPED = 211;          /// < Not mapped
+  CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212;    /// < Mapped resource not available for access as an array
+  CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213;  /// < Mapped resource not available for access as a pointer
+
+  CUDA_ERROR_INVALID_SOURCE = 300;       /// < Invalid source
+  CUDA_ERROR_FILE_NOT_FOUND = 301;       /// < File not found
+
+  CUDA_ERROR_INVALID_HANDLE = 400;       /// < Invalid handle
+
+  CUDA_ERROR_NOT_FOUND = 500;            /// < Not found
+
+  CUDA_ERROR_NOT_READY = 600;            /// < CUDA not ready
+
+  CUDA_ERROR_LAUNCH_FAILED = 700;           /// < Launch failed
+  CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701; /// < Launch exceeded resources
+  CUDA_ERROR_LAUNCH_TIMEOUT = 702;          /// < Launch exceeded timeout
+  CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703; /// < Launch with incompatible texturing
+
+  CUDA_ERROR_POINTER_IS_64BIT = 800;        /// < Attempted to retrieve 64-bit pointer via 32-bit API function
+  CUDA_ERROR_SIZE_IS_64BIT = 801;           /// < Attempted to retrieve 64-bit size via 32-bit API function
+
+  CUDA_ERROR_UNKNOWN = 999;                 /// < Unknown error
+
+const
+
+  { * If set, host memory is portable between CUDA contexts.
+    * Flag for ::cuMemHostAlloc() }
+  CU_MEMHOSTALLOC_PORTABLE = $01;
+
+  { * If set, host memory is mapped into CUDA address space and
+    * ::cuMemHostGetDevicePointer() may be called on the host pointer.
+    * Flag for ::cuMemHostAlloc() }
+  CU_MEMHOSTALLOC_DEVICEMAP = $02;
+
+  { * If set, host memory is allocated as write-combined - fast to write,
+    * faster to DMA, slow to read except via SSE4 streaming load instruction
+    * (MOVNTDQA).
+    * Flag for ::cuMemHostAlloc() }
+  CU_MEMHOSTALLOC_WRITECOMBINED = $04;
+
+  // 2D memory copy parameters
+type
+
+  PCUDA_MEMCPY2D = ^TCUDA_MEMCPY2D;
+
+  TCUDA_MEMCPY2D = record
+    srcXInBytes,       /// < Source X in bytes
+    srcY: Cardinal;    /// < Source Y
+
+    srcMemoryType: TCUmemorytype;  /// < Source memory type (host, device, array)
+    srcHost: Pointer;              /// < Source host pointer
+    srcDevice: TCUdeviceptr;       /// < Source device pointer
+    srcArray: PCUarray;            /// < Source array reference
+    srcPitch: Cardinal;            /// < Source pitch (ignored when src is array)
+
+    dstXInBytes,                   /// < Destination X in bytes
+    dstY: Cardinal;                /// < Destination Y
+    dstMemoryType: TCUmemorytype;  /// < Destination memory type (host, device, array)
+    dstHost: Pointer;              /// < Destination host pointer
+    dstDevice: TCUdeviceptr;       /// < Destination device pointer
+    dstArray: PCUarray;            /// < Destination array reference
+    dstPitch: Cardinal;            /// < Destination pitch (ignored when dst is array)
+
+    WidthInBytes: Cardinal;        /// < Width of 2D memory copy in bytes
+    Height: Cardinal;              /// < Height of 2D memory copy
+  end;
+
+  // 3D memory copy parameters
+  TCUDA_MEMCPY3D = record
+    srcXInBytes,                        /// < Source X in bytes
+    srcY,                               /// < Source Y
+    srcZ: Cardinal;                     /// < Source Z
+    srcLOD: Cardinal;                   /// < Source LOD
+    srcMemoryType: TCUmemorytype;       /// < Source memory type (host, device, array)
+    srcHost: Pointer;                   /// < Source host pointer
+    srcDevice: TCUdeviceptr;            /// < Source device pointer
+    srcArray: PCUarray;                 /// < Source array reference
+    reserved0: Pointer;                 /// < Must be NULL
+    srcPitch: Cardinal;                 /// < Source pitch (ignored when src is array)
+    srcHeight: Cardinal;                /// < Source height (ignored when src is array; may be 0 if Depth==1)
+
+    dstXInBytes,                        /// < Destination X in bytes
+    dstY,                               /// < Destination Y
+    dstZ: Cardinal;                     /// < Destination Z
+    dstLOD: Cardinal;                   /// < Destination LOD
+    dstMemoryType: TCUmemorytype;       /// < Destination memory type (host, device, array)
+    dstHost: Pointer;                   /// < Destination host pointer
+    dstDevice: TCUdeviceptr;            /// < Destination device pointer
+    dstArray: PCUarray;                 /// < Destination array reference
+    reserved1: Pointer;                 /// < Must be NULL
+    dstPitch: Cardinal;                 /// < Destination pitch (ignored when dst is array)
+    dstHeight: Cardinal;                /// < Destination height (ignored when dst is array; may be 0 if Depth==1)
+
+    WidthInBytes: Cardinal;             /// < Width of 3D memory copy in bytes
+    Height: Cardinal;                   /// < Height of 3D memory copy
+    Depth: Cardinal;                    /// < Depth of 3D memory copy
+  end;
+
+  // Array descriptor
+  PCUDA_ARRAY_DESCRIPTOR = ^TCUDA_ARRAY_DESCRIPTOR;
+
+  TCUDA_ARRAY_DESCRIPTOR = record
+    Width: Cardinal;                    /// < Width of array
+    Height: Cardinal;                   /// < Height of array
+    Format: TCUarray_format;            /// < Array format
+    NumChannels: Cardinal;              /// < Channels per array element
+  end;
+
+  // 3D array descriptor
+  TCUDA_ARRAY3D_DESCRIPTOR = record
+    Width: Cardinal;                     /// < Width of 3D array
+    Height: Cardinal;                    /// < Height of 3D array
+    Depth: Cardinal;                     /// < Depth of 3D array
+    Format: TCUarray_format;             /// < Array format
+    NumChannels: Cardinal;               /// < Channels per array element
+    Flags: Cardinal;                     /// < Flags
+  end;
+
+  // Flags to map or unmap a resource
+  TCUGLmap_flags = (CU_GL_MAP_RESOURCE_FLAGS_NONE,
+    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY, CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
+
+const
+  { * Override the texref format with a format inferred from the array.
+    * Flag for ::cuTexRefSetArray() }
+  CU_TRSA_OVERRIDE_FORMAT = $01;
+
+  { * Read the texture as integers rather than promoting the values to floats
+    * in the range [0,1].
+    * Flag for ::cuTexRefSetFlags() }
+  CU_TRSF_READ_AS_INTEGER = $01;
+
+  { * Use normalized texture coordinates in the range [0,1) instead of [0,dim).
+    * Flag for ::cuTexRefSetFlags() }
+  CU_TRSF_NORMALIZED_COORDINATES = $02;
+
+  { * For texture references loaded into the module, use default texunit from
+    * texture reference. }
+  CU_PARAM_TR_DEFAULT = -1;
+
+type
+  TDim3 = array [0 .. 2] of LongWord;
+
+{$IFDEF MSWINDOWS}
+type
+  HGPUNV = Pointer;
+{$ENDIF}
+
+type
+  TcuInit = function(Flags: Cardinal): TCUresult;stdcall;
+  TcuDriverGetVersion = function(out driverVersion: Integer): TCUresult;stdcall;
+  TcuDeviceGet = function(var device: TCUdevice; ordinal: Integer): TCUresult;stdcall;
+  TcuDeviceGetCount = function(var count: Integer): TCUresult;stdcall;
+  TcuDeviceGetName = function(name: PAnsiChar; len: Integer; dev: TCUdevice): TCUresult;stdcall;
+  TcuDeviceComputeCapability = function(var major: Integer; var minor: Integer; dev: TCUdevice): TCUresult;stdcall;
+  TcuDeviceTotalMem = function(bytes: PSize_t; dev: TCUdevice): TCUresult;stdcall;
+  TcuDeviceGetProperties = function(var prop: TCUdevprop; dev: TCUdevice): TCUresult;stdcall;
+  TcuDeviceGetAttribute = function(pi: PSize_t; attrib: TCUdevice_attribute; dev: TCUdevice): TCUresult;stdcall;
+  TcuCtxCreate = function(var pctx: PCUcontext; Flags: Cardinal; dev: TCUdevice): TCUresult;stdcall;
+  TcuCtxDestroy = function(ctx: PCUcontext): TCUresult;stdcall;
+  TcuCtxAttach = function(var pctx: PCUcontext; Flags: Cardinal): TCUresult;stdcall;
+  TcuCtxDetach = function(ctx: PCUcontext): TCUresult;stdcall;
+  TcuCtxPushCurrent = function(ctx: PCUcontext): TCUresult;stdcall;
+  TcuCtxPopCurrent = function(var pctx: PCUcontext): TCUresult;stdcall;
+  TcuCtxGetDevice = function(var device: TCUdevice): TCUresult;stdcall;
+  TcuCtxSynchronize = function: TCUresult;stdcall;
+  TcuModuleLoad = function(var module: PCUmodule; const fname: PAnsiChar): TCUresult;stdcall;
+  TcuModuleLoadData = function(var module: PCUmodule; const image: PAnsiChar): TCUresult;stdcall;
+  TcuModuleLoadDataEx = function(var module: PCUmodule; var image;
+    numOptions: Cardinal; var options: TCUjit_option; var optionValues): TCUresult;stdcall;
+  TcuModuleLoadFatBinary = function(var module: PCUmodule; var fatCubin): TCUresult;stdcall;
+  TcuModuleUnload = function(hmod: PCUmodule): TCUresult;stdcall;
+  TcuModuleGetFunction = function(out hfunc: PCUfunction; hmod: PCUmodule;
+    const name: PAnsiChar): TCUresult;stdcall;
+  TcuModuleGetGlobal = function(out dptr: TCUdeviceptr; var bytes: Cardinal;
+    hmod: PCUmodule; const name: PAnsiChar): TCUresult;stdcall;
+  TcuModuleGetTexRef = function(out pTexRef: PCUtexref; hmod: PCUmodule;
+    const name: PAnsiChar): TCUresult;stdcall;
+  TcuMemGetInfo = function(var free: Cardinal; var total: Cardinal): TCUresult;stdcall;
+  TcuMemAlloc = function(var dptr: TCUdeviceptr; bytesize: Cardinal): TCUresult;stdcall;
+  TcuMemAllocPitch = function(var dptr: TCUdeviceptr; var pPitch: Cardinal;
+    WidthInBytes: Cardinal; Height: Cardinal; ElementSizeBytes: Cardinal): TCUresult;stdcall;
+  TcuMemFree = function(dptr: TCUdeviceptr): TCUresult;stdcall;
+  TcuMemGetAddressRange = function(var pbase: TCUdeviceptr; var psize: Cardinal;
+    dptr: TCUdeviceptr): TCUresult;stdcall;
+  TcuMemAllocHost = function(var pp; bytesize: Cardinal): TCUresult;stdcall;
+  TcuMemFreeHost = function(p: Pointer): TCUresult;stdcall;
+  TcuMemHostAlloc = function(var pp: Pointer; bytesize: Cardinal; Flags: Cardinal): TCUresult;stdcall;
+  TcuMemHostGetDevicePointer = function(var pdptr: TCUdeviceptr; p: Pointer; Flags: Cardinal): TCUresult;stdcall;
+  TcuMemHostGetFlags = function(var pFlags: Cardinal; var p): TCUresult;stdcall;
+  TcuMemcpyHtoD = function(dstDevice: TCUdeviceptr; const srcHost: Pointer;
+    ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyDtoH = function(const dstHost: Pointer; srcDevice: TCUdeviceptr;
+    ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyDtoD = function(dstDevice: TCUdeviceptr; srcDevice: TCUdeviceptr;
+    ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyDtoDAsync = function(dstDevice: TCUdeviceptr;
+    srcDevice: TCUdeviceptr; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemcpyDtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
+    srcDevice: TCUdeviceptr; ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyAtoD = function(dstDevice: TCUdeviceptr; hSrc: PCUarray;
+    SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyHtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
+    pSrc: Pointer; ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyAtoH = function(dstHost: Pointer; srcArray: PCUarray;
+    SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpyAtoA = function(dstArray: PCUarray; dstIndex: Cardinal;
+    srcArray: PCUarray; SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
+  TcuMemcpy2D = function(const pCopy: PCUDA_MEMCPY2D): TCUresult;stdcall;
+  TcuMemcpy2DUnaligned = function(var pCopy: TCUDA_MEMCPY2D): TCUresult;stdcall;
+  TcuMemcpy3D = function(var pCopy: TCUDA_MEMCPY3D): TCUresult;stdcall;
+  TcuMemcpyHtoDAsync = function(dstDevice: TCUdeviceptr; var srcHost;
+    ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemcpyDtoHAsync = function(var dstHost; srcDevice: TCUdeviceptr;
+    ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemcpyHtoAAsync = function(dstArray: PCUarray; dstIndex: Cardinal;
+    var pSrc; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemcpyAtoHAsync = function(var dstHost; srcArray: PCUstream;
+    SrcIndex: Cardinal; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemcpy2DAsync = function(var pCopy: TCUDA_MEMCPY2D; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemcpy3DAsync = function(var pCopy: TCUDA_MEMCPY3D; hStream: PCUstream): TCUresult;stdcall;
+  TcuMemsetD8 = function(dstDevice: TCUdeviceptr; ub: Byte; N: Cardinal): TCUresult;stdcall;
+  TcuMemsetD16 = function(dstDevice: TCUdeviceptr; uw: Word; N: Cardinal): TCUresult;stdcall;
+  TcuMemsetD32 = function(dstDevice: TCUdeviceptr; ui: Cardinal; N: Cardinal): TCUresult;stdcall;
+  TcuMemsetD2D8 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
+    ub: Byte; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
+  TcuMemsetD2D16 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
+    uw: Word; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
+  TcuMemsetD2D32 = function(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
+    ui: Cardinal; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
+  TcuFuncSetBlockShape = function(hfunc: PCUfunction; x: Integer; y: Integer;
+    z: Integer): TCUresult;stdcall;
+  TcuFuncSetSharedSize = function(hfunc: PCUfunction; bytes: Cardinal): TCUresult;stdcall;
+  TcuFuncGetAttribute = function(var pi: Integer; attrib: TCUfunction_attribute;
+    hfunc: PCUfunction): TCUresult;stdcall;
+  TcuArrayCreate = function(var pHandle: PCUarray;
+    var pAllocateArray: TCUDA_ARRAY_DESCRIPTOR): TCUresult;stdcall;
+  TcuArrayGetDescriptor = function(var pArrayDescriptor: TCUDA_ARRAY_DESCRIPTOR;
+    hArray: PCUarray): TCUresult;stdcall;
+  TcuArrayDestroy = function(hArray: PCUarray): TCUresult;stdcall;
+  TcuArray3DCreate = function(var pHandle: PCUarray;
+    var pAllocateArray: TCUDA_ARRAY3D_DESCRIPTOR): TCUresult;stdcall;
+  TcuArray3DGetDescriptor = function(var pArrayDescriptor
+    : TCUDA_ARRAY3D_DESCRIPTOR; hArray: PCUarray): TCUresult;stdcall;
+  TcuTexRefCreate = function(var pTexRef: PCUtexref): TCUresult;stdcall;
+  TcuTexRefDestroy = function(hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuTexRefSetArray = function(hTexRef: PCUtexref; hArray: PCUarray;
+    Flags: Cardinal): TCUresult;stdcall;
+  TcuTexRefSetAddress = function(var ByteOffset: Cardinal; hTexRef: PCUtexref;
+    dptr: TCUdeviceptr; bytes: Cardinal): TCUresult;stdcall;
+  TcuTexRefSetAddress2D = function(hTexRef: PCUtexref;
+    var desc: TCUDA_ARRAY_DESCRIPTOR; dptr: TCUdeviceptr; Pitch: Cardinal)
+    : TCUresult;stdcall;
+  TcuTexRefSetFormat = function(hTexRef: PCUtexref; fmt: TCUarray_format;
+    NumPackedComponents: Integer): TCUresult;stdcall;
+  TcuTexRefSetAddressMode = function(hTexRef: PCUtexref; dim: Integer;
+    am: TCUaddress_mode): TCUresult;stdcall;
+  TcuTexRefSetFilterMode = function(hTexRef: PCUtexref; fm: TCUfilter_mode)
+    : TCUresult;stdcall;
+  TcuTexRefSetFlags = function(hTexRef: PCUtexref; Flags: Cardinal): TCUresult;stdcall;
+  TcuTexRefGetAddress = function(var pdptr: TCUdeviceptr; hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuTexRefGetArray = function(var phArray: PCUarray; hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuTexRefGetAddressMode = function(var pam: TCUaddress_mode;
+    hTexRef: PCUtexref; dim: Integer): TCUresult;stdcall;
+  TcuTexRefGetFilterMode = function(var pfm: TCUfilter_mode; hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuTexRefGetFormat = function(var pFormat: TCUarray_format;
+    var pNumChannels: Integer; hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuTexRefGetFlags = function(var pFlags: Cardinal; hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuParamSetSize = function(hfunc: PCUfunction; numbytes: Cardinal): TCUresult;stdcall;
+  TcuParamSeti = function(hfunc: PCUfunction; offset: Integer; value: Cardinal)
+    : TCUresult;stdcall;
+  TcuParamSetf = function(hfunc: PCUfunction; offset: Integer; value: Single)
+    : TCUresult;stdcall;
+  TcuParamSetv = function(hfunc: PCUfunction; offset: Integer; var ptr;
+    numbytes: Cardinal): TCUresult;stdcall;
+  TcuParamSetTexRef = function(hfunc: PCUfunction; texunit: Integer;
+    hTexRef: PCUtexref): TCUresult;stdcall;
+  TcuLaunch = function(f: PCUfunction): TCUresult;stdcall;
+  TcuLaunchGrid = function(f: PCUfunction; grid_width: Integer;
+    grid_height: Integer): TCUresult;stdcall;
+  TcuLaunchGridAsync = function(f: PCUfunction; grid_width: Integer;
+    grid_height: Integer; hStream: PCUstream): TCUresult;stdcall;
+  TcuEventCreate = function(var phEvent: PCUevent; Flags: Cardinal): TCUresult;stdcall;
+  TcuEventRecord = function(hEvent: PCUevent; hStream: PCUstream): TCUresult;stdcall;
+  TcuEventQuery = function(hEvent: PCUevent): TCUresult;stdcall;
+  TcuEventSynchronize = function(hEvent: PCUevent): TCUresult;stdcall;
+  TcuEventDestroy = function(hEvent: PCUevent): TCUresult;stdcall;
+  TcuEventElapsedTime = function(var pMilliseconds: Single; hStart: PCUevent;
+    hEnd: PCUevent): TCUresult;stdcall;
+  TcuStreamCreate = function(var phStream: PCUstream; Flags: Cardinal): TCUresult;stdcall;
+  TcuStreamQuery = function(hStream: PCUstream): TCUresult;stdcall;
+  TcuStreamSynchronize = function(hStream: PCUstream): TCUresult;stdcall;
+  TcuStreamDestroy = function(hStream: PCUstream): TCUresult;stdcall;
+  TcuGLCtxCreate = function(var pctx: PCUcontext; Flags: Cardinal;
+    device: TCUdevice): TCUresult;stdcall;
+  TcuGraphicsGLRegisterBuffer = function(var pCudaResource: PCUgraphicsResource;
+    buffer: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;stdcall;
+  TcuGraphicsGLRegisterImage = function(var pCudaResource: PCUgraphicsResource;
+    image, target: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;stdcall;
+  TcuWGLGetDevice = function(var pDevice: TCUdevice; hGpu: HGPUNV): TCUresult;stdcall;
+  TcuGraphicsUnregisterResource = function(resource: PCUgraphicsResource): TCUresult;stdcall;
+  TcuGraphicsSubResourceGetMappedArray = function(var pArray: PCUarray;
+    resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal)
+    : TCUresult;stdcall;
+  TcuGraphicsResourceGetMappedPointer = function(var pDevPtr: TCUdeviceptr;
+    out psize: Cardinal; resource: PCUgraphicsResource): TCUresult;stdcall;
+  TcuGraphicsResourceSetMapFlags = function(resource: PCUgraphicsResource;
+    Flags: Cardinal): TCUresult;stdcall;
+  TcuGraphicsMapResources = function(count: Cardinal;
+    resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
+  TcuGraphicsUnmapResources = function(count: Cardinal;
+    resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
+  TcuGLInit = procedure();stdcall;
+  TcuGLRegisterBufferObject = function(buffer: Cardinal): TCUresult;stdcall;
+  TcuGLMapBufferObject = function(var dptr: TCUdeviceptr; var size: Cardinal;
+    buffer: Cardinal): TCUresult;stdcall;
+  TcuGLUnmapBufferObject = function(buffer: Cardinal): TCUresult;stdcall;
+  TcuGLUnregisterBufferObject = function(buffer: Cardinal): TCUresult;stdcall;
+  TcuGLSetBufferObjectMapFlags = function(buffer: Cardinal; Flags: Cardinal)
+    : TCUresult;stdcall;
+  TcuGLMapBufferObjectAsync = function(var dptr: TCUdeviceptr;
+    var size: Cardinal; buffer: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+  TcuGLUnmapBufferObjectAsync = function(buffer: Cardinal; hStream: PCUstream)
+    : TCUresult;stdcall;
+
+var
+  cuInit: TcuInit;
+  cuDriverGetVersion: TcuDriverGetVersion;
+  cuDeviceGet: TcuDeviceGet;
+  cuDeviceGetCount: TcuDeviceGetCount;
+  cuDeviceGetName: TcuDeviceGetName;
+  cuDeviceComputeCapability: TcuDeviceComputeCapability;
+  cuDeviceTotalMem: TcuDeviceTotalMem;
+  cuDeviceGetProperties: TcuDeviceGetProperties;
+  cuDeviceGetAttribute: TcuDeviceGetAttribute;
+  cuCtxCreate: TcuCtxCreate;
+  cuCtxDestroy: TcuCtxDestroy;
+  cuCtxAttach: TcuCtxAttach;
+  cuCtxDetach: TcuCtxDetach;
+  cuCtxPushCurrent: TcuCtxPushCurrent;
+  cuCtxPopCurrent: TcuCtxPopCurrent;
+  cuCtxGetDevice: TcuCtxGetDevice;
+  cuCtxSynchronize: TcuCtxSynchronize;
+  cuModuleLoad: TcuModuleLoad;
+  cuModuleLoadData: TcuModuleLoadData;
+  cuModuleLoadDataEx: TcuModuleLoadDataEx;
+  cuModuleLoadFatBinary: TcuModuleLoadFatBinary;
+  cuModuleUnload: TcuModuleUnload;
+  cuModuleGetFunction: TcuModuleGetFunction;
+  cuModuleGetGlobal: TcuModuleGetGlobal;
+  cuModuleGetTexRef: TcuModuleGetTexRef;
+  cuMemGetInfo: TcuMemGetInfo;
+  cuMemAlloc: TcuMemAlloc;
+  cuMemAllocPitch: TcuMemAllocPitch;
+  cuMemFree: TcuMemFree;
+  cuMemGetAddressRange: TcuMemGetAddressRange;
+  cuMemAllocHost: TcuMemAllocHost;
+  cuMemFreeHost: TcuMemFreeHost;
+  cuMemHostAlloc: TcuMemHostAlloc;
+  cuMemHostGetDevicePointer: TcuMemHostGetDevicePointer;
+  cuMemHostGetFlags: TcuMemHostGetFlags;
+  cuMemcpyHtoD: TcuMemcpyHtoD;
+  cuMemcpyDtoH: TcuMemcpyDtoH;
+  cuMemcpyDtoD: TcuMemcpyDtoD;
+  cuMemcpyDtoDAsync: TcuMemcpyDtoDAsync;
+  cuMemcpyDtoA: TcuMemcpyDtoA;
+  cuMemcpyAtoD: TcuMemcpyAtoD;
+  cuMemcpyHtoA: TcuMemcpyHtoA;
+  cuMemcpyAtoH: TcuMemcpyAtoH;
+  cuMemcpyAtoA: TcuMemcpyAtoA;
+  cuMemcpy2D: TcuMemcpy2D;
+  cuMemcpy2DUnaligned: TcuMemcpy2DUnaligned;
+  cuMemcpy3D: TcuMemcpy3D;
+  cuMemcpyHtoDAsync: TcuMemcpyHtoDAsync;
+  cuMemcpyDtoHAsync: TcuMemcpyDtoHAsync;
+  cuMemcpyHtoAAsync: TcuMemcpyHtoAAsync;
+  cuMemcpyAtoHAsync: TcuMemcpyAtoHAsync;
+  cuMemcpy2DAsync: TcuMemcpy2DAsync;
+  cuMemcpy3DAsync: TcuMemcpy3DAsync;
+  cuMemsetD8: TcuMemsetD8;
+  cuMemsetD16: TcuMemsetD16;
+  cuMemsetD32: TcuMemsetD32;
+  cuMemsetD2D8: TcuMemsetD2D8;
+  cuMemsetD2D16: TcuMemsetD2D16;
+  cuMemsetD2D32: TcuMemsetD2D32;
+  cuFuncSetBlockShape: TcuFuncSetBlockShape;
+  cuFuncSetSharedSize: TcuFuncSetSharedSize;
+  cuFuncGetAttribute: TcuFuncGetAttribute;
+  cuArrayCreate: TcuArrayCreate;
+  cuArrayGetDescriptor: TcuArrayGetDescriptor;
+  cuArrayDestroy: TcuArrayDestroy;
+  cuArray3DCreate: TcuArray3DCreate;
+  cuArray3DGetDescriptor: TcuArray3DGetDescriptor;
+  cuTexRefCreate: TcuTexRefCreate;
+  cuTexRefDestroy: TcuTexRefDestroy;
+  cuTexRefSetArray: TcuTexRefSetArray;
+  cuTexRefSetAddress: TcuTexRefSetAddress;
+  cuTexRefSetAddress2D: TcuTexRefSetAddress2D;
+  cuTexRefSetFormat: TcuTexRefSetFormat;
+  cuTexRefSetAddressMode: TcuTexRefSetAddressMode;
+  cuTexRefSetFilterMode: TcuTexRefSetFilterMode;
+  cuTexRefSetFlags: TcuTexRefSetFlags;
+  cuTexRefGetAddress: TcuTexRefGetAddress;
+  cuTexRefGetArray: TcuTexRefGetArray;
+  cuTexRefGetAddressMode: TcuTexRefGetAddressMode;
+  cuTexRefGetFilterMode: TcuTexRefGetFilterMode;
+  cuTexRefGetFormat: TcuTexRefGetFormat;
+  cuTexRefGetFlags: TcuTexRefGetFlags;
+  cuParamSetSize: TcuParamSetSize;
+  cuParamSeti: TcuParamSeti;
+  cuParamSetf: TcuParamSetf;
+  cuParamSetv: TcuParamSetv;
+  cuParamSetTexRef: TcuParamSetTexRef;
+  cuLaunch: TcuLaunch;
+  cuLaunchGrid: TcuLaunchGrid;
+  cuLaunchGridAsync: TcuLaunchGridAsync;
+  cuEventCreate: TcuEventCreate;
+  cuEventRecord: TcuEventRecord;
+  cuEventQuery: TcuEventQuery;
+  cuEventSynchronize: TcuEventSynchronize;
+  cuEventDestroy: TcuEventDestroy;
+  cuEventElapsedTime: TcuEventElapsedTime;
+  cuStreamCreate: TcuStreamCreate;
+  cuStreamQuery: TcuStreamQuery;
+  cuStreamSynchronize: TcuStreamSynchronize;
+  cuStreamDestroy: TcuStreamDestroy;
+  cuGLInit: TcuGLInit;
+  cuGLCtxCreate: TcuGLCtxCreate;
+  cuGraphicsGLRegisterBuffer: TcuGraphicsGLRegisterBuffer;
+  cuGraphicsGLRegisterImage: TcuGraphicsGLRegisterImage;
+  cuWGLGetDevice: TcuWGLGetDevice;
+  cuGraphicsUnregisterResource: TcuGraphicsUnregisterResource;
+  cuGraphicsSubResourceGetMappedArray: TcuGraphicsSubResourceGetMappedArray;
+  cuGraphicsResourceGetMappedPointer: TcuGraphicsResourceGetMappedPointer;
+  cuGraphicsResourceSetMapFlags: TcuGraphicsResourceSetMapFlags;
+  cuGraphicsMapResources: TcuGraphicsMapResources;
+  cuGraphicsUnmapResources: TcuGraphicsUnmapResources;
+  cuGLRegisterBufferObject: TcuGLRegisterBufferObject;
+  cuGLMapBufferObject: TcuGLMapBufferObject;
+  cuGLUnmapBufferObject: TcuGLUnmapBufferObject;
+  cuGLUnregisterBufferObject: TcuGLUnregisterBufferObject;
+  cuGLSetBufferObjectMapFlags: TcuGLSetBufferObjectMapFlags;
+  cuGLMapBufferObjectAsync: TcuGLMapBufferObjectAsync;
+  cuGLUnmapBufferObjectAsync: TcuGLUnmapBufferObjectAsync;
+
+function InitCUDA: Boolean;
+procedure CloseCUDA;
+function InitCUDAFromLibrary(const LibName: WideString): Boolean;
+function IsCUDAInitialized: Boolean;
+function Get_CUDA_API_Error_String(AError: TCUresult): string;
+
+//==============================================================
+implementation
+//==============================================================
+
+resourcestring
+  cudasFuncRetErr = '%s return error: %s';
+
+const
+  INVALID_MODULEHANDLE = 0;
+
+  // ************** Windows specific ********************
+{$IFDEF MSWINDOWS}
+
+var
+  CUDAHandle: HINST;
+{$ENDIF}
+  // ************** UNIX specific ********************
+{$IFDEF UNIX}
+
+var
+  CUDAHandle: TLibHandle;
+{$ENDIF}
+
+const
+  cuInitName = 'cuInit';
+  cuDriverGetVersionName = 'cuDriverGetVersion';
+  cuDeviceGet_Name = 'cuDeviceGet';
+  cuDeviceGetCountName = 'cuDeviceGetCount';
+  cuDeviceGetNameName = 'cuDeviceGetName';
+  cuDeviceComputeCapabilityName = 'cuDeviceComputeCapability';
+  cuDeviceTotalMemName = 'cuDeviceTotalMem';
+  cuDeviceGetPropertiesName = 'cuDeviceGetProperties';
+  cuDeviceGetAttributeName = 'cuDeviceGetAttribute';
+  cuCtxCreateName = 'cuCtxCreate';
+  cuCtxDestroyName = 'cuCtxDestroy';
+  cuCtxAttachName = 'cuCtxAttach';
+  cuCtxDetachName = 'cuCtxDetach';
+  cuCtxPushCurrentName = 'cuCtxPushCurrent';
+  cuCtxPopCurrentName = 'cuCtxPopCurrent';
+  cuCtxGetDeviceName = 'cuCtxGetDevice';
+  cuCtxSynchronizeName = 'cuCtxSynchronize';
+  cuModuleLoadName = 'cuModuleLoad';
+  cuModuleLoadDataName = 'cuModuleLoadData';
+  cuModuleLoadDataExName = 'cuModuleLoadDataEx';
+  cuModuleLoadFatBinaryName = 'cuModuleLoadFatBinary';
+  cuModuleUnloadName = 'cuModuleUnload';
+  cuModuleGetFunctionName = 'cuModuleGetFunction';
+  cuModuleGetGlobalName = 'cuModuleGetGlobal';
+  cuModuleGetTexRefName = 'cuModuleGetTexRef';
+  cuMemGetInfoName = 'cuMemGetInfo';
+  cuMemAllocName = 'cuMemAlloc';
+  cuMemAllocPitchName = 'cuMemAllocPitch';
+  cuMemFreeName = 'cuMemFree';
+  cuMemGetAddressRangeName = 'cuMemGetAddressRange';
+  cuMemAllocHostName = 'cuMemAllocHost';
+  cuMemFreeHostName = 'cuMemFreeHost';
+  cuMemHostAllocName = 'cuMemHostAlloc';
+  cuMemHostGetDevicePointerName = 'cuMemHostGetDevicePointer';
+  cuMemHostGetFlagsName = 'cuMemHostGetFlags';
+  cuMemcpyHtoDName = 'cuMemcpyHtoD';
+  cuMemcpyDtoHName = 'cuMemcpyDtoH';
+  cuMemcpyDtoDName = 'cuMemcpyDtoD';
+  cuMemcpyDtoDAsyncName = 'cuMemcpyDtoDAsync';
+  cuMemcpyDtoAName = 'cuMemcpyDtoA';
+  cuMemcpyAtoDName = 'cuMemcpyAtoD';
+  cuMemcpyHtoAName = 'cuMemcpyHtoA';
+  cuMemcpyAtoHName = 'cuMemcpyAtoH';
+  cuMemcpyAtoAName = 'cuMemcpyAtoA';
+  cuMemcpy2DName = 'cuMemcpy2D';
+  cuMemcpy2DUnalignedName = 'cuMemcpy2DUnaligned';
+  cuMemcpy3DName = 'cuMemcpy3D';
+  cuMemcpyHtoDAsyncName = 'cuMemcpyHtoDAsync';
+  cuMemcpyDtoHAsyncName = 'cuMemcpyDtoHAsync';
+  cuMemcpyHtoAAsyncName = 'cuMemcpyHtoAAsync';
+  cuMemcpyAtoHAsyncName = 'cuMemcpyAtoHAsync';
+  cuMemcpy2DAsyncName = 'cuMemcpy2DAsync';
+  cuMemcpy3DAsyncName = 'cuMemcpy3DAsync';
+  cuMemsetD8Name = 'cuMemsetD8';
+  cuMemsetD16Name = 'cuMemsetD16';
+  cuMemsetD32Name = 'cuMemsetD32';
+  cuMemsetD2D8Name = 'cuMemsetD2D8';
+  cuMemsetD2D16Name = 'cuMemsetD2D16';
+  cuMemsetD2D32Name = 'cuMemsetD2D32';
+  cuFuncSetBlockShapeName = 'cuFuncSetBlockShape';
+  cuFuncSetSharedSizeName = 'cuFuncSetSharedSize';
+  cuFuncGetAttributeName = 'cuFuncGetAttribute';
+  cuArrayCreateName = 'cuArrayCreate';
+  cuArrayGetDescriptorName = 'cuArrayGetDescriptor';
+  cuArrayDestroyName = 'cuArrayDestroy';
+  cuArray3DCreateName = 'cuArray3DCreate';
+  cuArray3DGetDescriptorName = 'cuArray3DGetDescriptor';
+  cuTexRefCreateName = 'cuTexRefCreate';
+  cuTexRefDestroyName = 'cuTexRefDestroy';
+  cuTexRefSetArrayName = 'cuTexRefSetArray';
+  cuTexRefSetAddressName = 'cuTexRefSetAddress';
+  cuTexRefSetAddress2DName = 'cuTexRefSetAddress2D';
+  cuTexRefSetFormatName = 'cuTexRefSetFormat';
+  cuTexRefSetAddressModeName = 'cuTexRefSetAddressMode';
+  cuTexRefSetFilterModeName = 'cuTexRefSetFilterMode';
+  cuTexRefSetFlagsName = 'cuTexRefSetFlags';
+  cuTexRefGetAddressName = 'cuTexRefGetAddress';
+  cuTexRefGetArrayName = 'cuTexRefGetArray';
+  cuTexRefGetAddressModeName = 'cuTexRefGetAddressMode';
+  cuTexRefGetFilterModeName = 'cuTexRefGetFilterMode';
+  cuTexRefGetFormatName = 'cuTexRefGetFormat';
+  cuTexRefGetFlagsName = 'cuTexRefGetFlags';
+  cuParamSetSizeName = 'cuParamSetSize';
+  cuParamSetiName = 'cuParamSeti';
+  cuParamSetfName = 'cuParamSetf';
+  cuParamSetvName = 'cuParamSetv';
+  cuParamSetTexRefName = 'cuParamSetTexRef';
+  cuLaunchName = 'cuLaunch';
+  cuLaunchGridName = 'cuLaunchGrid';
+  cuLaunchGridAsyncName = 'cuLaunchGridAsync';
+  cuEventCreateName = 'cuEventCreate';
+  cuEventRecordName = 'cuEventRecord';
+  cuEventQueryName = 'cuEventQuery';
+  cuEventSynchronizeName = 'cuEventSynchronize';
+  cuEventDestroyName = 'cuEventDestroy';
+  cuEventElapsedTimeName = 'cuEventElapsedTime';
+  cuStreamCreateName = 'cuStreamCreate';
+  cuStreamQueryName = 'cuStreamQuery';
+  cuStreamSynchronizeName = 'cuStreamSynchronize';
+  cuStreamDestroyName = 'cuStreamDestroy';
+  cuGLCtxCreateName = 'cuGLCtxCreate';
+  cuGraphicsGLRegisterBufferName = 'cuGraphicsGLRegisterBuffer';
+  cuGraphicsGLRegisterImageName = 'cuGraphicsGLRegisterImage';
+  cuWGLGetDeviceName = 'cuWGLGetDevice';
+  cuGraphicsUnregisterResourceName = 'cuGraphicsUnregisterResource';
+  cuGraphicsSubResourceGetMappedArrayName =
+    'cuGraphicsSubResourceGetMappedArray';
+  cuGraphicsResourceGetMappedPointerName = 'cuGraphicsResourceGetMappedPointer';
+  cuGraphicsResourceSetMapFlagsName = 'cuGraphicsResourceSetMapFlags';
+  cuGraphicsMapResourcesName = 'cuGraphicsMapResources';
+  cuGraphicsUnmapResourcesName = 'cuGraphicsUnmapResources';
+  cuGLInitName = 'cuGLInit';
+  cuGLRegisterBufferObjectName = 'cuGLRegisterBufferObject';
+  cuGLMapBufferObjectName = 'cuGLMapBufferObject';
+  cuGLUnmapBufferObjectName = 'cuGLUnmapBufferObject';
+  cuGLUnregisterBufferObjectName = 'cuGLUnregisterBufferObject';
+  cuGLSetBufferObjectMapFlagsName = 'cuGLSetBufferObjectMapFlags';
+  cuGLMapBufferObjectAsyncName = 'cuGLMapBufferObjectAsync';
+  cuGLUnmapBufferObjectAsyncName = 'cuGLUnmapBufferObjectAsync';
+
+{$IFDEF USE_CUDA_DEBUG_MODE}
+
+var
+  cuInit_: TcuInit;
+  cuDriverGetVersion_: TcuDriverGetVersion;
+  cuDeviceGet_: TcuDeviceGet;
+  cuDeviceGetCount_: TcuDeviceGetCount;
+  cuDeviceGetName_: TcuDeviceGetName;
+  cuDeviceComputeCapability_: TcuDeviceComputeCapability;
+  cuDeviceTotalMem_: TcuDeviceTotalMem;
+  cuDeviceGetProperties_: TcuDeviceGetProperties;
+  cuDeviceGetAttribute_: TcuDeviceGetAttribute;
+  cuCtxCreate_: TcuCtxCreate;
+  cuCtxDestroy_: TcuCtxDestroy;
+  cuCtxAttach_: TcuCtxAttach;
+  cuCtxDetach_: TcuCtxDetach;
+  cuCtxPushCurrent_: TcuCtxPushCurrent;
+  cuCtxPopCurrent_: TcuCtxPopCurrent;
+  cuCtxGetDevice_: TcuCtxGetDevice;
+  cuCtxSynchronize_: TcuCtxSynchronize;
+  cuModuleLoad_: TcuModuleLoad;
+  cuModuleLoadData_: TcuModuleLoadData;
+  cuModuleLoadDataEx_: TcuModuleLoadDataEx;
+  cuModuleLoadFatBinary_: TcuModuleLoadFatBinary;
+  cuModuleUnload_: TcuModuleUnload;
+  cuModuleGetFunction_: TcuModuleGetFunction;
+  cuModuleGetGlobal_: TcuModuleGetGlobal;
+  cuModuleGetTexRef_: TcuModuleGetTexRef;
+  cuMemGetInfo_: TcuMemGetInfo;
+  cuMemAlloc_: TcuMemAlloc;
+  cuMemAllocPitch_: TcuMemAllocPitch;
+  cuMemFree_: TcuMemFree;
+  cuMemGetAddressRange_: TcuMemGetAddressRange;
+  cuMemAllocHost_: TcuMemAllocHost;
+  cuMemFreeHost_: TcuMemFreeHost;
+  cuMemHostAlloc_: TcuMemHostAlloc;
+  cuMemHostGetDevicePointer_: TcuMemHostGetDevicePointer;
+  cuMemHostGetFlags_: TcuMemHostGetFlags;
+  cuMemcpyHtoD_: TcuMemcpyHtoD;
+  cuMemcpyDtoH_: TcuMemcpyDtoH;
+  cuMemcpyDtoD_: TcuMemcpyDtoD;
+  cuMemcpyDtoDAsync_: TcuMemcpyDtoDAsync;
+  cuMemcpyDtoA_: TcuMemcpyDtoA;
+  cuMemcpyAtoD_: TcuMemcpyAtoD;
+  cuMemcpyHtoA_: TcuMemcpyHtoA;
+  cuMemcpyAtoH_: TcuMemcpyAtoH;
+  cuMemcpyAtoA_: TcuMemcpyAtoA;
+  cuMemcpy2D_: TcuMemcpy2D;
+  cuMemcpy2DUnaligned_: TcuMemcpy2DUnaligned;
+  cuMemcpy3D_: TcuMemcpy3D;
+  cuMemcpyHtoDAsync_: TcuMemcpyHtoDAsync;
+  cuMemcpyDtoHAsync_: TcuMemcpyDtoHAsync;
+  cuMemcpyHtoAAsync_: TcuMemcpyHtoAAsync;
+  cuMemcpyAtoHAsync_: TcuMemcpyAtoHAsync;
+  cuMemcpy2DAsync_: TcuMemcpy2DAsync;
+  cuMemcpy3DAsync_: TcuMemcpy3DAsync;
+  cuMemsetD8_: TcuMemsetD8;
+  cuMemsetD16_: TcuMemsetD16;
+  cuMemsetD32_: TcuMemsetD32;
+  cuMemsetD2D8_: TcuMemsetD2D8;
+  cuMemsetD2D16_: TcuMemsetD2D16;
+  cuMemsetD2D32_: TcuMemsetD2D32;
+  cuFuncSetBlockShape_: TcuFuncSetBlockShape;
+  cuFuncSetSharedSize_: TcuFuncSetSharedSize;
+  cuFuncGetAttribute_: TcuFuncGetAttribute;
+  cuArrayCreate_: TcuArrayCreate;
+  cuArrayGetDescriptor_: TcuArrayGetDescriptor;
+  cuArrayDestroy_: TcuArrayDestroy;
+  cuArray3DCreate_: TcuArray3DCreate;
+  cuArray3DGetDescriptor_: TcuArray3DGetDescriptor;
+  cuTexRefCreate_: TcuTexRefCreate;
+  cuTexRefDestroy_: TcuTexRefDestroy;
+  cuTexRefSetArray_: TcuTexRefSetArray;
+  cuTexRefSetAddress_: TcuTexRefSetAddress;
+  cuTexRefSetAddress2D_: TcuTexRefSetAddress2D;
+  cuTexRefSetFormat_: TcuTexRefSetFormat;
+  cuTexRefSetAddressMode_: TcuTexRefSetAddressMode;
+  cuTexRefSetFilterMode_: TcuTexRefSetFilterMode;
+  cuTexRefSetFlags_: TcuTexRefSetFlags;
+  cuTexRefGetAddress_: TcuTexRefGetAddress;
+  cuTexRefGetArray_: TcuTexRefGetArray;
+  cuTexRefGetAddressMode_: TcuTexRefGetAddressMode;
+  cuTexRefGetFilterMode_: TcuTexRefGetFilterMode;
+  cuTexRefGetFormat_: TcuTexRefGetFormat;
+  cuTexRefGetFlags_: TcuTexRefGetFlags;
+  cuParamSetSize_: TcuParamSetSize;
+  cuParamSeti_: TcuParamSeti;
+  cuParamSetf_: TcuParamSetf;
+  cuParamSetv_: TcuParamSetv;
+  cuParamSetTexRef_: TcuParamSetTexRef;
+  cuLaunch_: TcuLaunch;
+  cuLaunchGrid_: TcuLaunchGrid;
+  cuLaunchGridAsync_: TcuLaunchGridAsync;
+  cuEventCreate_: TcuEventCreate;
+  cuEventRecord_: TcuEventRecord;
+  cuEventQuery_: TcuEventQuery;
+  cuEventSynchronize_: TcuEventSynchronize;
+  cuEventDestroy_: TcuEventDestroy;
+  cuEventElapsedTime_: TcuEventElapsedTime;
+  cuStreamCreate_: TcuStreamCreate;
+  cuStreamQuery_: TcuStreamQuery;
+  cuStreamSynchronize_: TcuStreamSynchronize;
+  cuStreamDestroy_: TcuStreamDestroy;
+  cuGLCtxCreate_: TcuGLCtxCreate;
+  cuGraphicsGLRegisterBuffer_: TcuGraphicsGLRegisterBuffer;
+  cuGraphicsGLRegisterImage_: TcuGraphicsGLRegisterImage;
+  cuWGLGetDevice_: TcuWGLGetDevice;
+  cuGraphicsUnregisterResource_: TcuGraphicsUnregisterResource;
+  cuGraphicsSubResourceGetMappedArray_: TcuGraphicsSubResourceGetMappedArray;
+  cuGraphicsResourceGetMappedPointer_: TcuGraphicsResourceGetMappedPointer;
+  cuGraphicsResourceSetMapFlags_: TcuGraphicsResourceSetMapFlags;
+  cuGraphicsMapResources_: TcuGraphicsMapResources;
+  cuGraphicsUnmapResources_: TcuGraphicsUnmapResources;
+  cuGLRegisterBufferObject_: TcuGLRegisterBufferObject;
+  cuGLMapBufferObject_: TcuGLMapBufferObject;
+  cuGLUnmapBufferObject_: TcuGLUnmapBufferObject;
+  cuGLUnregisterBufferObject_: TcuGLUnregisterBufferObject;
+  cuGLSetBufferObjectMapFlags_: TcuGLSetBufferObjectMapFlags;
+  cuGLMapBufferObjectAsync_: TcuGLMapBufferObjectAsync;
+  cuGLUnmapBufferObjectAsync_: TcuGLUnmapBufferObjectAsync;
+
+function cuInitShell(Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuInit_(Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuInitName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDriverGetVersionShell(out driverVersion: Integer): TCUresult;stdcall;
+begin
+  Result := cuDriverGetVersion_(driverVersion);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDriverGetVersionName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceGetShell(var device: TCUdevice; ordinal: Integer): TCUresult;stdcall;
+begin
+  Result := cuDeviceGet_(device, ordinal);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuDeviceGet_Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceGetCountShell(var count: Integer): TCUresult;stdcall;
+begin
+  Result := cuDeviceGetCount_(count);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetCountName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceGetNameShell(name: PAnsiChar; len: Integer; dev: TCUdevice)
+  : TCUresult;stdcall;
+begin
+  Result := cuDeviceGetName_(name, len, dev);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetNameName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceComputeCapabilityShell(var major: Integer; var minor: Integer;
+  dev: TCUdevice): TCUresult;stdcall;
+begin
+  Result := cuDeviceComputeCapability_(major, minor, dev);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceComputeCapabilityName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceTotalMemShell(bytes: PSize_t; dev: TCUdevice): TCUresult;stdcall;
+begin
+  Result := cuDeviceTotalMem_(bytes, dev);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceTotalMemName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceGetPropertiesShell(var prop: TCUdevprop; dev: TCUdevice)
+  : TCUresult;
+stdcall;
+begin
+  Result := cuDeviceGetProperties_(prop, dev);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetPropertiesName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuDeviceGetAttributeShell(pi: PSize_t; attrib: TCUdevice_attribute;
+  dev: TCUdevice): TCUresult;stdcall;
+begin
+  Result := cuDeviceGetAttribute_(pi, attrib, dev);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuDeviceGetAttributeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxCreateShell(var pctx: PCUcontext; Flags: Cardinal; dev: TCUdevice)
+  : TCUresult;stdcall;
+begin
+  Result := cuCtxCreate_(pctx, Flags, dev);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuCtxCreateName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxDestroyShell(ctx: PCUcontext): TCUresult;stdcall;
+begin
+  Result := cuCtxDestroy_(ctx);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuCtxDestroyName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxAttachShell(var pctx: PCUcontext; Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuCtxAttach_(pctx, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuCtxAttachName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxDetachShell(ctx: PCUcontext): TCUresult;stdcall;
+begin
+  Result := cuCtxDetach_(ctx);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuCtxDetachName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxPushCurrentShell(ctx: PCUcontext): TCUresult;stdcall;
+begin
+  Result := cuCtxPushCurrent_(ctx);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxPushCurrentName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxPopCurrentShell(var pctx: PCUcontext): TCUresult;stdcall;
+begin
+  Result := cuCtxPopCurrent_(pctx);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxPopCurrentName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxGetDeviceShell(var device: TCUdevice): TCUresult;stdcall;
+begin
+  Result := cuCtxGetDevice_(device);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuCtxGetDeviceName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuCtxSynchronizeShell: TCUresult;stdcall;
+begin
+  Result := cuCtxSynchronize_;
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuCtxSynchronizeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleLoadShell(var module: PCUmodule; const fname: PAnsiChar)
+  : TCUresult;stdcall;
+begin
+  Result := cuModuleLoad_(module, fname);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuModuleLoadName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleLoadDataShell(var module: PCUmodule; const image: PAnsiChar)
+  : TCUresult;stdcall;
+begin
+  Result := cuModuleLoadData_(module, image);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadDataName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleLoadDataExShell(var module: PCUmodule; var image;
+  numOptions: Cardinal; var options: TCUjit_option; var optionValues)
+  : TCUresult;stdcall;
+begin
+  Result := cuModuleLoadDataEx_(module, image, numOptions, options,
+    optionValues);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadDataExName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleLoadFatBinaryShell(var module: PCUmodule; var fatCubin)
+  : TCUresult;stdcall;
+begin
+  Result := cuModuleLoadFatBinary_(module, fatCubin);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleLoadFatBinaryName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleUnloadShell(hmod: PCUmodule): TCUresult;stdcall;
+begin
+  Result := cuModuleUnload_(hmod);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuModuleUnloadName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleGetFunctionShell(out hfunc: PCUfunction; hmod: PCUmodule;
+  const name: PAnsiChar): TCUresult;stdcall;
+begin
+  Result := cuModuleGetFunction_(hfunc, hmod, name);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetFunctionName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleGetGlobalShell(out dptr: TCUdeviceptr; var bytes: Cardinal;
+  hmod: PCUmodule; const name: PAnsiChar): TCUresult;stdcall;
+begin
+  Result := cuModuleGetGlobal_(dptr, bytes, hmod, name);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetGlobalName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuModuleGetTexRefShell(out pTexRef: PCUtexref; hmod: PCUmodule;
+  const name: PAnsiChar): TCUresult;stdcall;
+begin
+  Result := cuModuleGetTexRef_(pTexRef, hmod, name);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuModuleGetTexRefName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemGetInfoShell(var free: Cardinal; var total: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemGetInfo_(free, total);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemGetInfoName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemAllocShell(var dptr: TCUdeviceptr; bytesize: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemAlloc_(dptr, bytesize);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemAllocName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemAllocPitchShell(var dptr: TCUdeviceptr; var pPitch: Cardinal;
+  WidthInBytes: Cardinal; Height: Cardinal; ElementSizeBytes: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemAllocPitch_(dptr, pPitch, WidthInBytes, Height,
+    ElementSizeBytes);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemAllocPitchName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemFreeShell(dptr: TCUdeviceptr): TCUresult;stdcall;
+begin
+  Result := cuMemFree_(dptr);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemFreeName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemGetAddressRangeShell(var pbase: TCUdeviceptr; var psize: Cardinal;
+  dptr: TCUdeviceptr): TCUresult;stdcall;
+begin
+  Result := cuMemGetAddressRange_(pbase, psize, dptr);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemGetAddressRangeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemAllocHostShell(var pp; bytesize: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemAllocHost_(pp, bytesize);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemAllocHostName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemFreeHostShell(p: Pointer): TCUresult;stdcall;
+begin
+  Result := cuMemFreeHost_(p);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemFreeHostName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemHostAllocShell(var pp: Pointer; bytesize: Cardinal; Flags: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemHostAlloc_(pp, bytesize, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemHostAllocName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemHostGetDevicePointerShell(var pdptr: TCUdeviceptr; p: Pointer;
+  Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemHostGetDevicePointer_(pdptr, p, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemHostGetDevicePointerName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemHostGetFlagsShell(var pFlags: Cardinal; var p): TCUresult;stdcall;
+begin
+  Result := cuMemHostGetFlags_(pFlags, p);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemHostGetFlagsName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyHtoDShell(dstDevice: TCUdeviceptr; const srcHost: Pointer;
+  ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyHtoD_(dstDevice, srcHost, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyHtoDName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyDtoHShell(const dstHost: Pointer; srcDevice: TCUdeviceptr;
+  ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyDtoH_(dstHost, srcDevice, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyDtoHName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyDtoDShell(dstDevice: TCUdeviceptr; srcDevice: TCUdeviceptr;
+  ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyDtoD_(dstDevice, srcDevice, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyDtoDName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyDtoDAsyncShell(dstDevice: TCUdeviceptr;
+  srcDevice: TCUdeviceptr; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuMemcpyDtoDAsync_(dstDevice, srcDevice, ByteCount, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyDtoDAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyDtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
+  srcDevice: TCUdeviceptr; ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyDtoA_(dstArray, dstIndex, srcDevice, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyDtoAName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyAtoDShell(dstDevice: TCUdeviceptr; hSrc: PCUarray;
+  SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyAtoD_(dstDevice, hSrc, SrcIndex, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyAtoDName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyHtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
+  pSrc: Pointer; ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyHtoA_(dstArray, dstIndex, pSrc, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyHtoAName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyAtoHShell(dstHost: Pointer; srcArray: PCUarray;
+  SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyAtoH_(dstHost, srcArray, SrcIndex, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyAtoHName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyAtoAShell(dstArray: PCUarray; dstIndex: Cardinal;
+  srcArray: PCUarray; SrcIndex: Cardinal; ByteCount: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemcpyAtoA_(dstArray, dstIndex, srcArray, SrcIndex, ByteCount);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpyAtoAName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpy2DShell(const pCopy: PCUDA_MEMCPY2D): TCUresult;stdcall;
+begin
+  Result := cuMemcpy2D_(pCopy);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpy2DName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpy2DUnalignedShell(var pCopy: TCUDA_MEMCPY2D): TCUresult;stdcall;
+begin
+  Result := cuMemcpy2DUnaligned_(pCopy);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy2DUnalignedName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpy3DShell(var pCopy: TCUDA_MEMCPY3D): TCUresult;stdcall;
+begin
+  Result := cuMemcpy3D_(pCopy);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemcpy3DName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyHtoDAsyncShell(dstDevice: TCUdeviceptr; var srcHost;
+  ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuMemcpyHtoDAsync_(dstDevice, srcHost, ByteCount, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyHtoDAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyDtoHAsyncShell(var dstHost; srcDevice: TCUdeviceptr;
+  ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuMemcpyDtoHAsync_(dstHost, srcDevice, ByteCount, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyDtoHAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyHtoAAsyncShell(dstArray: PCUarray; dstIndex: Cardinal;
+  var pSrc; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuMemcpyHtoAAsync_(dstArray, dstIndex, pSrc, ByteCount, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyHtoAAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpyAtoHAsyncShell(var dstHost; srcArray: PCUstream;
+  SrcIndex: Cardinal; ByteCount: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuMemcpyAtoHAsync_(dstHost, srcArray, SrcIndex, ByteCount, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpyAtoHAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpy2DAsyncShell(var pCopy: TCUDA_MEMCPY2D; hStream: PCUstream)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemcpy2DAsync_(pCopy, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy2DAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemcpy3DAsyncShell(var pCopy: TCUDA_MEMCPY3D; hStream: PCUstream)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemcpy3DAsync_(pCopy, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuMemcpy3DAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemsetD8Shell(dstDevice: TCUdeviceptr; ub: Byte; N: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemsetD8_(dstDevice, ub, N);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemsetD8Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemsetD16Shell(dstDevice: TCUdeviceptr; uw: Word; N: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemsetD16_(dstDevice, uw, N);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemsetD16Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemsetD32Shell(dstDevice: TCUdeviceptr; ui: Cardinal; N: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuMemsetD32_(dstDevice, ui, N);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemsetD32Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemsetD2D8Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
+  ub: Byte; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemsetD2D8_(dstDevice, dstPitch, ub, Width, Height);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemsetD2D8Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemsetD2D16Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
+  uw: Word; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemsetD2D16_(dstDevice, dstPitch, uw, Width, Height);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemsetD2D16Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuMemsetD2D32Shell(dstDevice: TCUdeviceptr; dstPitch: Cardinal;
+  ui: Cardinal; Width: Cardinal; Height: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuMemsetD2D32_(dstDevice, dstPitch, ui, Width, Height);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuMemsetD2D32Name, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuFuncSetBlockShapeShell(hfunc: PCUfunction; x: Integer; y: Integer;
+  z: Integer): TCUresult;stdcall;
+begin
+  Result := cuFuncSetBlockShape_(hfunc, x, y, z);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncSetBlockShapeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuFuncSetSharedSizeShell(hfunc: PCUfunction; bytes: Cardinal)
+  : TCUresult;
+stdcall;
+begin
+  Result := cuFuncSetSharedSize_(hfunc, bytes);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncSetSharedSizeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuFuncGetAttributeShell(var pi: Integer; attrib: TCUfunction_attribute;
+  hfunc: PCUfunction): TCUresult;stdcall;
+begin
+  Result := cuFuncGetAttribute_(pi, attrib, hfunc);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuFuncGetAttributeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuArrayCreateShell(var pHandle: PCUarray;
+  var pAllocateArray: TCUDA_ARRAY_DESCRIPTOR): TCUresult;stdcall;
+begin
+  Result := cuArrayCreate_(pHandle, pAllocateArray);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuArrayCreateName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuArrayGetDescriptorShell(var pArrayDescriptor: TCUDA_ARRAY_DESCRIPTOR;
+  hArray: PCUarray): TCUresult;stdcall;
+begin
+  Result := cuArrayGetDescriptor_(pArrayDescriptor, hArray);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArrayGetDescriptorName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuArrayDestroyShell(hArray: PCUarray): TCUresult;stdcall;
+begin
+  Result := cuArrayDestroy_(hArray);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuArrayDestroyName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuArray3DCreateShell(var pHandle: PCUarray;
+  var pAllocateArray: TCUDA_ARRAY3D_DESCRIPTOR): TCUresult;stdcall;
+begin
+  Result := cuArray3DCreate_(pHandle, pAllocateArray);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArray3DCreateName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuArray3DGetDescriptorShell(var pArrayDescriptor
+  : TCUDA_ARRAY3D_DESCRIPTOR; hArray: PCUarray): TCUresult;stdcall;
+begin
+  Result := cuArray3DGetDescriptor_(pArrayDescriptor, hArray);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuArray3DGetDescriptorName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefCreateShell(var pTexRef: PCUtexref): TCUresult;stdcall;
+begin
+  Result := cuTexRefCreate_(pTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuTexRefCreateName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefDestroyShell(hTexRef: PCUtexref): TCUresult;stdcall;
+begin
+  Result := cuTexRefDestroy_(hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefDestroyName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetArrayShell(hTexRef: PCUtexref; hArray: PCUarray;
+  Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuTexRefSetArray_(hTexRef, hArray, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetArrayName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetAddressShell(var ByteOffset: Cardinal; hTexRef: PCUtexref;
+  dptr: TCUdeviceptr; bytes: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuTexRefSetAddress_(ByteOffset, hTexRef, dptr, bytes);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddressName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetAddress2DShell(hTexRef: PCUtexref;
+  var desc: TCUDA_ARRAY_DESCRIPTOR; dptr: TCUdeviceptr; Pitch: Cardinal)
+  : TCUresult;
+stdcall;
+begin
+  Result := cuTexRefSetAddress2D_(hTexRef, desc, dptr, Pitch);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddress2DName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetFormatShell(hTexRef: PCUtexref; fmt: TCUarray_format;
+  NumPackedComponents: Integer): TCUresult;stdcall;
+begin
+  Result := cuTexRefSetFormat_(hTexRef, fmt, NumPackedComponents);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFormatName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetAddressModeShell(hTexRef: PCUtexref; dim: Integer;
+  am: TCUaddress_mode): TCUresult;stdcall;
+begin
+  Result := cuTexRefSetAddressMode_(hTexRef, dim, am);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetAddressModeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetFilterModeShell(hTexRef: PCUtexref; fm: TCUfilter_mode)
+  : TCUresult;stdcall;
+begin
+  Result := cuTexRefSetFilterMode_(hTexRef, fm);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFilterModeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefSetFlagsShell(hTexRef: PCUtexref; Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuTexRefSetFlags_(hTexRef, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefSetFlagsName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefGetAddressShell(var pdptr: TCUdeviceptr; hTexRef: PCUtexref)
+  : TCUresult;stdcall;
+begin
+  Result := cuTexRefGetAddress_(pdptr, hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetAddressName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefGetArrayShell(var phArray: PCUarray; hTexRef: PCUtexref)
+  : TCUresult;stdcall;
+begin
+  Result := cuTexRefGetArray_(phArray, hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetArrayName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefGetAddressModeShell(var pam: TCUaddress_mode;
+  hTexRef: PCUtexref; dim: Integer): TCUresult;stdcall;
+begin
+  Result := cuTexRefGetAddressMode_(pam, hTexRef, dim);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetAddressModeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefGetFilterModeShell(var pfm: TCUfilter_mode; hTexRef: PCUtexref)
+  : TCUresult;stdcall;
+begin
+  Result := cuTexRefGetFilterMode_(pfm, hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFilterModeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefGetFormatShell(var pFormat: TCUarray_format;
+  var pNumChannels: Integer; hTexRef: PCUtexref): TCUresult;stdcall;
+begin
+  Result := cuTexRefGetFormat_(pFormat, pNumChannels, hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFormatName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuTexRefGetFlagsShell(var pFlags: Cardinal; hTexRef: PCUtexref)
+  : TCUresult;stdcall;
+begin
+  Result := cuTexRefGetFlags_(pFlags, hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuTexRefGetFlagsName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuParamSetSizeShell(hfunc: PCUfunction; numbytes: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuParamSetSize_(hfunc, numbytes);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuParamSetSizeName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuParamSetiShell(hfunc: PCUfunction; offset: Integer; value: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuParamSeti_(hfunc, offset, value);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuParamSetiName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuParamSetfShell(hfunc: PCUfunction; offset: Integer; value: Single)
+  : TCUresult;stdcall;
+begin
+  Result := cuParamSetf_(hfunc, offset, value);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuParamSetfName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuParamSetvShell(hfunc: PCUfunction; offset: Integer; var ptr;
+  numbytes: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuParamSetv_(hfunc, offset, ptr, numbytes);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuParamSetvName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuParamSetTexRefShell(hfunc: PCUfunction; texunit: Integer;
+  hTexRef: PCUtexref): TCUresult;stdcall;
+begin
+  Result := cuParamSetTexRef_(hfunc, texunit, hTexRef);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuParamSetTexRefName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuLaunchShell(f: PCUfunction): TCUresult;stdcall;
+begin
+  Result := cuLaunch_(f);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuLaunchName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuLaunchGridShell(f: PCUfunction; grid_width: Integer;
+  grid_height: Integer): TCUresult;stdcall;
+begin
+  Result := cuLaunchGrid_(f, grid_width, grid_height);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuLaunchGridName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuLaunchGridAsyncShell(f: PCUfunction; grid_width: Integer;
+  grid_height: Integer; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuLaunchGridAsync_(f, grid_width, grid_height, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuLaunchGridAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuEventCreateShell(var phEvent: PCUevent; Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuEventCreate_(phEvent, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuEventCreateName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuEventRecordShell(hEvent: PCUevent; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuEventRecord_(hEvent, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuEventRecordName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuEventQueryShell(hEvent: PCUevent): TCUresult;stdcall;
+begin
+  Result := cuEventQuery_(hEvent);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuEventQueryName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuEventSynchronizeShell(hEvent: PCUevent): TCUresult;stdcall;
+begin
+  Result := cuEventSynchronize_(hEvent);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuEventSynchronizeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuEventDestroyShell(hEvent: PCUevent): TCUresult;stdcall;
+begin
+  Result := cuEventDestroy_(hEvent);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuEventDestroyName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuEventElapsedTimeShell(var pMilliseconds: Single; hStart: PCUevent;
+  hEnd: PCUevent): TCUresult;stdcall;
+begin
+  Result := cuEventElapsedTime_(pMilliseconds, hStart, hEnd);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuEventElapsedTimeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuStreamCreateShell(var phStream: PCUstream; Flags: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuStreamCreate_(phStream, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuStreamCreateName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuStreamQueryShell(hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuStreamQuery_(hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuStreamQueryName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuStreamSynchronizeShell(hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuStreamSynchronize_(hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuStreamSynchronizeName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuStreamDestroyShell(hStream: PCUstream): TCUresult;
+stdcall;
+begin
+  Result := cuStreamDestroy_(hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuStreamDestroyName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLCtxCreateShell(var pctx: PCUcontext; Flags: Cardinal;
+  device: TCUdevice): TCUresult;
+stdcall;
+begin
+  Result := cuGLCtxCreate_(pctx, Flags, device);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuGLCtxCreateName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsGLRegisterBufferShell(var pCudaResource: PCUgraphicsResource;
+  buffer: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;
+stdcall;
+begin
+  Result := cuGraphicsGLRegisterBuffer_(pCudaResource, buffer, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsGLRegisterBufferName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsGLRegisterImageShell(var pCudaResource: PCUgraphicsResource;
+  image, target: Cardinal; Flags: TCUgraphicsMapResourceFlags): TCUresult;stdcall;
+begin
+  Result := cuGraphicsGLRegisterImage_(pCudaResource, image, target, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsGLRegisterImageName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuWGLGetDeviceShell(var pDevice: TCUdevice; hGpu: HGPUNV): TCUresult;stdcall;
+begin
+  Result := cuWGLGetDevice_(pDevice, hGpu);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuWGLGetDeviceName, Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsUnregisterResourceShell(resource: PCUgraphicsResource)
+  : TCUresult;stdcall;
+begin
+  Result := cuGraphicsUnregisterResource_(resource);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsUnregisterResourceName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsSubResourceGetMappedArrayShell(var pArray: PCUarray;
+  resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuGraphicsSubResourceGetMappedArray_(pArray, resource, arrayIndex,
+    mipLevel);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuGraphicsSubResourceGetMappedArrayName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsResourceGetMappedPointerShell(var pDevPtr: TCUdeviceptr;
+  out psize: Cardinal; resource: PCUgraphicsResource): TCUresult;stdcall;
+begin
+  Result := cuGraphicsResourceGetMappedPointer_(pDevPtr, psize, resource);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr,
+      [cuGraphicsResourceGetMappedPointerName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsResourceSetMapFlagsShell(resource: PCUgraphicsResource;
+  Flags: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuGraphicsResourceSetMapFlags_(resource, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsResourceSetMapFlagsName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsMapResourcesShell(count: Cardinal;
+  resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuGraphicsMapResources_(count, resources, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsMapResourcesName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGraphicsUnmapResourcesShell(count: Cardinal;
+  resources: PPCUgraphicsResource; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuGraphicsUnmapResources_(count, resources, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGraphicsUnmapResourcesName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLRegisterBufferObjectShell(buffer: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuGLRegisterBufferObject_(buffer);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLRegisterBufferObjectName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLMapBufferObjectShell(var dptr: TCUdeviceptr; var size: Cardinal;
+  buffer: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuGLMapBufferObject_(dptr, size, buffer);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLMapBufferObjectName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLUnmapBufferObjectShell(buffer: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuGLUnmapBufferObject_(buffer);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnmapBufferObjectName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLUnregisterBufferObjectShell(buffer: Cardinal): TCUresult;stdcall;
+begin
+  Result := cuGLUnregisterBufferObject_(buffer);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnregisterBufferObjectName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLSetBufferObjectMapFlagsShell(buffer: Cardinal; Flags: Cardinal)
+  : TCUresult;stdcall;
+begin
+  Result := cuGLSetBufferObjectMapFlags_(buffer, Flags);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLSetBufferObjectMapFlagsName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLMapBufferObjectAsyncShell(var dptr: TCUdeviceptr;
+  var size: Cardinal; buffer: Cardinal; hStream: PCUstream): TCUresult;stdcall;
+begin
+  Result := cuGLMapBufferObjectAsync_(dptr, size, buffer, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLMapBufferObjectAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+function cuGLUnmapBufferObjectAsyncShell(buffer: Cardinal; hStream: PCUstream)
+  : TCUresult;stdcall;
+begin
+  Result := cuGLUnmapBufferObjectAsync_(buffer, hStream);
+  if Result <> CUDA_SUCCESS then
+    GLSLogger.LogErrorFmt(cudasFuncRetErr, [cuGLUnmapBufferObjectAsyncName,
+      Get_CUDA_API_Error_String(Result)])
+end;
+
+{$ENDIF GLS_CUDA_DEBUG_MODE}
+
+function GetProcAddressCUDA(ProcName: PAnsiChar): Pointer;
+var
+  Alt: AnsiString;
+begin
+  Alt := AnsiString(ProcName) + '_v2';
+  Result := GetProcAddress(Cardinal(CUDAHandle), PAnsiChar(Alt));
+  if Result = nil then
+      Result := GetProcAddress(Cardinal(CUDAHandle), ProcName);
+end;
+
+function InitCUDA: Boolean;
+begin
+  if CUDAHandle = INVALID_MODULEHANDLE then
+    Result := InitCUDAFromLibrary(CUDAAPIDLL)
+  else
+    Result := True;
+end;
+
+procedure CloseCUDA;
+begin
+  if CUDAHandle <> INVALID_MODULEHANDLE then
+  begin
+    FreeLibrary(Cardinal(CUDAHandle));
+    CUDAHandle := INVALID_MODULEHANDLE;
+  end;
+end;
+
+function InitCUDAFromLibrary(const LibName: WideString): Boolean;
+var
+  V: Integer;
+begin
+  Result := False;
+  CloseCUDA;
+  CUDAHandle := GetModuleHandleW(PWideChar(LibName));
+  if CUDAHandle = INVALID_MODULEHANDLE then
+    CUDAHandle := LoadLibraryW(PWideChar(LibName));
+
+  if CUDAHandle = INVALID_MODULEHANDLE then
+    Exit;
+
+{$IFNDEF USE_CUDA_DEBUG_MODE}
+
+  cuInit := GetProcAddressCUDA(cuInitName);
+  cuDriverGetVersion := GetProcAddressCUDA(cuDriverGetVersionName);
+  cuDeviceGet := GetProcAddressCUDA(cuDeviceGet_Name);
+  cuDeviceGetCount := GetProcAddressCUDA(cuDeviceGetCountName);
+  cuDeviceGetName := GetProcAddressCUDA(cuDeviceGetNameName);
+  cuDeviceComputeCapability := GetProcAddressCUDA(cuDeviceComputeCapabilityName);
+  cuDeviceTotalMem := GetProcAddressCUDA(cuDeviceTotalMemName);
+  cuDeviceGetProperties := GetProcAddressCUDA(cuDeviceGetPropertiesName);
+  cuDeviceGetAttribute := GetProcAddressCUDA(cuDeviceGetAttributeName);
+  cuCtxCreate := GetProcAddressCUDA(cuCtxCreateName);
+  cuCtxDestroy := GetProcAddressCUDA(cuCtxDestroyName);
+  cuCtxAttach := GetProcAddressCUDA(cuCtxAttachName);
+  cuCtxDetach := GetProcAddressCUDA(cuCtxDetachName);
+  cuCtxPushCurrent := GetProcAddressCUDA(cuCtxPushCurrentName);
+  cuCtxPopCurrent := GetProcAddressCUDA(cuCtxPopCurrentName);
+  cuCtxGetDevice := GetProcAddressCUDA(cuCtxGetDeviceName);
+  cuCtxSynchronize := GetProcAddressCUDA(cuCtxSynchronizeName);
+  cuModuleLoad := GetProcAddressCUDA(cuModuleLoadName);
+  cuModuleLoadData := GetProcAddressCUDA(cuModuleLoadDataName);
+  cuModuleLoadDataEx := GetProcAddressCUDA(cuModuleLoadDataExName);
+  cuModuleLoadFatBinary := GetProcAddressCUDA(cuModuleLoadFatBinaryName);
+  cuModuleUnload := GetProcAddressCUDA(cuModuleUnloadName);
+  cuModuleGetFunction := GetProcAddressCUDA(cuModuleGetFunctionName);
+  cuModuleGetGlobal := GetProcAddressCUDA(cuModuleGetGlobalName);
+  cuModuleGetTexRef := GetProcAddressCUDA(cuModuleGetTexRefName);
+  cuMemGetInfo := GetProcAddressCUDA(cuMemGetInfoName);
+  cuMemAlloc := GetProcAddressCUDA(cuMemAllocName);
+  cuMemAllocPitch := GetProcAddressCUDA(cuMemAllocPitchName);
+  cuMemFree := GetProcAddressCUDA(cuMemFreeName);
+  cuMemGetAddressRange := GetProcAddressCUDA(cuMemGetAddressRangeName);
+  cuMemAllocHost := GetProcAddressCUDA(cuMemAllocHostName);
+  cuMemFreeHost := GetProcAddressCUDA(cuMemFreeHostName);
+  cuMemHostAlloc := GetProcAddressCUDA(cuMemHostAllocName);
+  cuMemHostGetDevicePointer := GetProcAddressCUDA(cuMemHostGetDevicePointerName);
+  cuMemHostGetFlags := GetProcAddressCUDA(cuMemHostGetFlagsName);
+  cuMemcpyHtoD := GetProcAddressCUDA(cuMemcpyHtoDName);
+  cuMemcpyDtoH := GetProcAddressCUDA(cuMemcpyDtoHName);
+  cuMemcpyDtoD := GetProcAddressCUDA(cuMemcpyDtoDName);
+  cuMemcpyDtoDAsync := GetProcAddressCUDA(cuMemcpyDtoDAsyncName);
+  cuMemcpyDtoA := GetProcAddressCUDA(cuMemcpyDtoAName);
+  cuMemcpyAtoD := GetProcAddressCUDA(cuMemcpyAtoDName);
+  cuMemcpyHtoA := GetProcAddressCUDA(cuMemcpyHtoAName);
+  cuMemcpyAtoH := GetProcAddressCUDA(cuMemcpyAtoHName);
+  cuMemcpyAtoA := GetProcAddressCUDA(cuMemcpyAtoAName);
+  cuMemcpy2D := GetProcAddressCUDA(cuMemcpy2DName);
+  cuMemcpy2DUnaligned := GetProcAddressCUDA(cuMemcpy2DUnalignedName);
+  cuMemcpy3D := GetProcAddressCUDA(cuMemcpy3DName);
+  cuMemcpyHtoDAsync := GetProcAddressCUDA(cuMemcpyHtoDAsyncName);
+  cuMemcpyDtoHAsync := GetProcAddressCUDA(cuMemcpyDtoHAsyncName);
+  cuMemcpyHtoAAsync := GetProcAddressCUDA(cuMemcpyHtoAAsyncName);
+  cuMemcpyAtoHAsync := GetProcAddressCUDA(cuMemcpyAtoHAsyncName);
+  cuMemcpy2DAsync := GetProcAddressCUDA(cuMemcpy2DAsyncName);
+  cuMemcpy3DAsync := GetProcAddressCUDA(cuMemcpy3DAsyncName);
+  cuMemsetD8 := GetProcAddressCUDA(cuMemsetD8Name);
+  cuMemsetD16 := GetProcAddressCUDA(cuMemsetD16Name);
+  cuMemsetD32 := GetProcAddressCUDA(cuMemsetD32Name);
+  cuMemsetD2D8 := GetProcAddressCUDA(cuMemsetD2D8Name);
+  cuMemsetD2D16 := GetProcAddressCUDA(cuMemsetD2D16Name);
+  cuMemsetD2D32 := GetProcAddressCUDA(cuMemsetD2D32Name);
+  cuFuncSetBlockShape := GetProcAddressCUDA(cuFuncSetBlockShapeName);
+  cuFuncSetSharedSize := GetProcAddressCUDA(cuFuncSetSharedSizeName);
+  cuFuncGetAttribute := GetProcAddressCUDA(cuFuncGetAttributeName);
+  cuArrayCreate := GetProcAddressCUDA(cuArrayCreateName);
+  cuArrayGetDescriptor := GetProcAddressCUDA(cuArrayGetDescriptorName);
+  cuArrayDestroy := GetProcAddressCUDA(cuArrayDestroyName);
+  cuArray3DCreate := GetProcAddressCUDA(cuArray3DCreateName);
+  cuArray3DGetDescriptor := GetProcAddressCUDA(cuArray3DGetDescriptorName);
+  cuTexRefCreate := GetProcAddressCUDA(cuTexRefCreateName);
+  cuTexRefDestroy := GetProcAddressCUDA(cuTexRefDestroyName);
+  cuTexRefSetArray := GetProcAddressCUDA(cuTexRefSetArrayName);
+  cuTexRefSetAddress := GetProcAddressCUDA(cuTexRefSetAddressName);
+  cuTexRefSetAddress2D := GetProcAddressCUDA(cuTexRefSetAddress2DName);
+  cuTexRefSetFormat := GetProcAddressCUDA(cuTexRefSetFormatName);
+  cuTexRefSetAddressMode := GetProcAddressCUDA(cuTexRefSetAddressModeName);
+  cuTexRefSetFilterMode := GetProcAddressCUDA(cuTexRefSetFilterModeName);
+  cuTexRefSetFlags := GetProcAddressCUDA(cuTexRefSetFlagsName);
+  cuTexRefGetAddress := GetProcAddressCUDA(cuTexRefGetAddressName);
+  cuTexRefGetArray := GetProcAddressCUDA(cuTexRefGetArrayName);
+  cuTexRefGetAddressMode := GetProcAddressCUDA(cuTexRefGetAddressModeName);
+  cuTexRefGetFilterMode := GetProcAddressCUDA(cuTexRefGetFilterModeName);
+  cuTexRefGetFormat := GetProcAddressCUDA(cuTexRefGetFormatName);
+  cuTexRefGetFlags := GetProcAddressCUDA(cuTexRefGetFlagsName);
+  cuParamSetSize := GetProcAddressCUDA(cuParamSetSizeName);
+  cuParamSeti := GetProcAddressCUDA(cuParamSetiName);
+  cuParamSetf := GetProcAddressCUDA(cuParamSetfName);
+  cuParamSetv := GetProcAddressCUDA(cuParamSetvName);
+  cuParamSetTexRef := GetProcAddressCUDA(cuParamSetTexRefName);
+  cuLaunch := GetProcAddressCUDA(cuLaunchName);
+  cuLaunchGrid := GetProcAddressCUDA(cuLaunchGridName);
+  cuLaunchGridAsync := GetProcAddressCUDA(cuLaunchGridAsyncName);
+  cuEventCreate := GetProcAddressCUDA(cuEventCreateName);
+  cuEventRecord := GetProcAddressCUDA(cuEventRecordName);
+  cuEventQuery := GetProcAddressCUDA(cuEventQueryName);
+  cuEventSynchronize := GetProcAddressCUDA(cuEventSynchronizeName);
+  cuEventDestroy := GetProcAddressCUDA(cuEventDestroyName);
+  cuEventElapsedTime := GetProcAddressCUDA(cuEventElapsedTimeName);
+  cuStreamCreate := GetProcAddressCUDA(cuStreamCreateName);
+  cuStreamQuery := GetProcAddressCUDA(cuStreamQueryName);
+  cuStreamSynchronize := GetProcAddressCUDA(cuStreamSynchronizeName);
+  cuStreamDestroy := GetProcAddressCUDA(cuStreamDestroyName);
+  cuGLCtxCreate := GetProcAddressCUDA(cuGLCtxCreateName);
+  cuGraphicsGLRegisterBuffer := GetProcAddressCUDA(cuGraphicsGLRegisterBufferName);
+  cuGraphicsGLRegisterImage := GetProcAddressCUDA(cuGraphicsGLRegisterImageName);
+  cuWGLGetDevice := GetProcAddressCUDA(cuWGLGetDeviceName);
+  cuGraphicsUnregisterResource := GetProcAddressCUDA(cuGraphicsUnregisterResourceName);
+  cuGraphicsSubResourceGetMappedArray := GetProcAddressCUDA(cuGraphicsSubResourceGetMappedArrayName);
+  cuGraphicsResourceGetMappedPointer := GetProcAddressCUDA(cuGraphicsResourceGetMappedPointerName);
+  cuGraphicsResourceSetMapFlags := GetProcAddressCUDA(cuGraphicsResourceSetMapFlagsName);
+  cuGraphicsMapResources := GetProcAddressCUDA(cuGraphicsMapResourcesName);
+  cuGraphicsUnmapResources := GetProcAddressCUDA(cuGraphicsUnmapResourcesName);
+  cuGLInit := GetProcAddressCUDA(cuGLInitName);
+  cuGLRegisterBufferObject := GetProcAddressCUDA(cuGLRegisterBufferObjectName);
+  cuGLMapBufferObject := GetProcAddressCUDA(cuGLMapBufferObjectName);
+  cuGLUnmapBufferObject := GetProcAddressCUDA(cuGLUnmapBufferObjectName);
+  cuGLUnregisterBufferObject := GetProcAddressCUDA(cuGLUnregisterBufferObjectName);
+  cuGLSetBufferObjectMapFlags := GetProcAddressCUDA(cuGLSetBufferObjectMapFlagsName);
+  cuGLMapBufferObjectAsync := GetProcAddressCUDA(cuGLMapBufferObjectAsyncName);
+  cuGLUnmapBufferObjectAsync := GetProcAddressCUDA(cuGLUnmapBufferObjectAsyncName);
+{$ELSE}
+  cuInit_ := GetProcAddressCUDA(cuInitName);
+  cuInit := cuInitShell;
+  cuDriverGetVersion_ := GetProcAddressCUDA(cuDriverGetVersionName);
+  cuDriverGetVersion := cuDriverGetVersionShell;
+  cuDeviceGet_ := GetProcAddressCUDA(cuDeviceGet_Name);
+  cuDeviceGet := cuDeviceGetShell;
+  cuDeviceGetCount_ := GetProcAddressCUDA(cuDeviceGetCountName);
+  cuDeviceGetCount := cuDeviceGetCountShell;
+  cuDeviceGetName_ := GetProcAddressCUDA(cuDeviceGetNameName);
+  cuDeviceGetName := cuDeviceGetNameShell;
+  cuDeviceComputeCapability_ := GetProcAddressCUDA(cuDeviceComputeCapabilityName);
+  cuDeviceComputeCapability := cuDeviceComputeCapabilityShell;
+  cuDeviceTotalMem_ := GetProcAddressCUDA(cuDeviceTotalMemName);
+  cuDeviceTotalMem := cuDeviceTotalMemShell;
+  cuDeviceGetProperties_ := GetProcAddressCUDA(cuDeviceGetPropertiesName);
+  cuDeviceGetProperties := cuDeviceGetPropertiesShell;
+  cuDeviceGetAttribute_ := GetProcAddressCUDA(cuDeviceGetAttributeName);
+  cuDeviceGetAttribute := cuDeviceGetAttributeShell;
+  cuCtxCreate_ := GetProcAddressCUDA(cuCtxCreateName);
+  cuCtxCreate := cuCtxCreateShell;
+  cuCtxDestroy_ := GetProcAddressCUDA(cuCtxDestroyName);
+  cuCtxDestroy := cuCtxDestroyShell;
+  cuCtxAttach_ := GetProcAddressCUDA(cuCtxAttachName);
+  cuCtxAttach := cuCtxAttachShell;
+  cuCtxDetach_ := GetProcAddressCUDA(cuCtxDetachName);
+  cuCtxDetach := cuCtxDetachShell;
+  cuCtxPushCurrent_ := GetProcAddressCUDA(cuCtxPushCurrentName);
+  cuCtxPushCurrent := cuCtxPushCurrentShell;
+  cuCtxPopCurrent_ := GetProcAddressCUDA(cuCtxPopCurrentName);
+  cuCtxPopCurrent := cuCtxPopCurrentShell;
+  cuCtxGetDevice_ := GetProcAddressCUDA(cuCtxGetDeviceName);
+  cuCtxGetDevice := cuCtxGetDeviceShell;
+  cuCtxSynchronize_ := GetProcAddressCUDA(cuCtxSynchronizeName);
+  cuCtxSynchronize := cuCtxSynchronizeShell;
+  cuModuleLoad_ := GetProcAddressCUDA(cuModuleLoadName);
+  cuModuleLoad := cuModuleLoadShell;
+  cuModuleLoadData_ := GetProcAddressCUDA(cuModuleLoadDataName);
+  cuModuleLoadData := cuModuleLoadDataShell;
+  cuModuleLoadDataEx_ := GetProcAddressCUDA(cuModuleLoadDataExName);
+  cuModuleLoadDataEx := cuModuleLoadDataExShell;
+  cuModuleLoadFatBinary_ := GetProcAddressCUDA(cuModuleLoadFatBinaryName);
+  cuModuleLoadFatBinary := cuModuleLoadFatBinaryShell;
+  cuModuleUnload_ := GetProcAddressCUDA(cuModuleUnloadName);
+  cuModuleUnload := cuModuleUnloadShell;
+  cuModuleGetFunction_ := GetProcAddressCUDA(cuModuleGetFunctionName);
+  cuModuleGetFunction := cuModuleGetFunctionShell;
+  cuModuleGetGlobal_ := GetProcAddressCUDA(cuModuleGetGlobalName);
+  cuModuleGetGlobal := cuModuleGetGlobalShell;
+  cuModuleGetTexRef_ := GetProcAddressCUDA(cuModuleGetTexRefName);
+  cuModuleGetTexRef := cuModuleGetTexRefShell;
+  cuMemGetInfo_ := GetProcAddressCUDA(cuMemGetInfoName);
+  cuMemGetInfo := cuMemGetInfoShell;
+  cuMemAlloc_ := GetProcAddressCUDA(cuMemAllocName);
+  cuMemAlloc := cuMemAllocShell;
+  cuMemAllocPitch_ := GetProcAddressCUDA(cuMemAllocPitchName);
+  cuMemAllocPitch := cuMemAllocPitchShell;
+  cuMemFree_ := GetProcAddressCUDA(cuMemFreeName);
+  cuMemFree := cuMemFreeShell;
+  cuMemGetAddressRange_ := GetProcAddressCUDA(cuMemGetAddressRangeName);
+  cuMemGetAddressRange := cuMemGetAddressRangeShell;
+  cuMemAllocHost_ := GetProcAddressCUDA(cuMemAllocHostName);
+  cuMemAllocHost := cuMemAllocHostShell;
+  cuMemFreeHost_ := GetProcAddressCUDA(cuMemFreeHostName);
+  cuMemFreeHost := cuMemFreeHostShell;
+  cuMemHostAlloc_ := GetProcAddressCUDA(cuMemHostAllocName);
+  cuMemHostAlloc := cuMemHostAllocShell;
+  cuMemHostGetDevicePointer_ := GetProcAddressCUDA(cuMemHostGetDevicePointerName);
+  cuMemHostGetDevicePointer := cuMemHostGetDevicePointerShell;
+  cuMemHostGetFlags_ := GetProcAddressCUDA(cuMemHostGetFlagsName);
+  cuMemHostGetFlags := cuMemHostGetFlagsShell;
+  cuMemcpyHtoD_ := GetProcAddressCUDA(cuMemcpyHtoDName);
+  cuMemcpyHtoD := cuMemcpyHtoDShell;
+  cuMemcpyDtoH_ := GetProcAddressCUDA(cuMemcpyDtoHName);
+  cuMemcpyDtoH := cuMemcpyDtoHShell;
+  cuMemcpyDtoD_ := GetProcAddressCUDA(cuMemcpyDtoDName);
+  cuMemcpyDtoD := cuMemcpyDtoDShell;
+  cuMemcpyDtoDAsync_ := GetProcAddressCUDA(cuMemcpyDtoDAsyncName);
+  cuMemcpyDtoDAsync := cuMemcpyDtoDAsyncShell;
+  cuMemcpyDtoA_ := GetProcAddressCUDA(cuMemcpyDtoAName);
+  cuMemcpyDtoA := cuMemcpyDtoAShell;
+  cuMemcpyAtoD_ := GetProcAddressCUDA(cuMemcpyAtoDName);
+  cuMemcpyAtoD := cuMemcpyAtoDShell;
+  cuMemcpyHtoA_ := GetProcAddressCUDA(cuMemcpyHtoAName);
+  cuMemcpyHtoA := cuMemcpyHtoAShell;
+  cuMemcpyAtoH_ := GetProcAddressCUDA(cuMemcpyAtoHName);
+  cuMemcpyAtoH := cuMemcpyAtoHShell;
+  cuMemcpyAtoA_ := GetProcAddressCUDA(cuMemcpyAtoAName);
+  cuMemcpyAtoA := cuMemcpyAtoAShell;
+  cuMemcpy2D_ := GetProcAddressCUDA(cuMemcpy2DName);
+  cuMemcpy2D := cuMemcpy2DShell;
+  cuMemcpy2DUnaligned_ := GetProcAddressCUDA(cuMemcpy2DUnalignedName);
+  cuMemcpy2DUnaligned := cuMemcpy2DUnalignedShell;
+  cuMemcpy3D_ := GetProcAddressCUDA(cuMemcpy3DName);
+  cuMemcpy3D := cuMemcpy3DShell;
+  cuMemcpyHtoDAsync_ := GetProcAddressCUDA(cuMemcpyHtoDAsyncName);
+  cuMemcpyHtoDAsync := cuMemcpyHtoDAsyncShell;
+  cuMemcpyDtoHAsync_ := GetProcAddressCUDA(cuMemcpyDtoHAsyncName);
+  cuMemcpyDtoHAsync := cuMemcpyDtoHAsyncShell;
+  cuMemcpyHtoAAsync_ := GetProcAddressCUDA(cuMemcpyHtoAAsyncName);
+  cuMemcpyHtoAAsync := cuMemcpyHtoAAsyncShell;
+  cuMemcpyAtoHAsync_ := GetProcAddressCUDA(cuMemcpyAtoHAsyncName);
+  cuMemcpyAtoHAsync := cuMemcpyAtoHAsyncShell;
+  cuMemcpy2DAsync_ := GetProcAddressCUDA(cuMemcpy2DAsyncName);
+  cuMemcpy2DAsync := cuMemcpy2DAsyncShell;
+  cuMemcpy3DAsync_ := GetProcAddressCUDA(cuMemcpy3DAsyncName);
+  cuMemcpy3DAsync := cuMemcpy3DAsyncShell;
+  cuMemsetD8_ := GetProcAddressCUDA(cuMemsetD8Name);
+  cuMemsetD8 := cuMemsetD8Shell;
+  cuMemsetD16_ := GetProcAddressCUDA(cuMemsetD16Name);
+  cuMemsetD16 := cuMemsetD16Shell;
+  cuMemsetD32_ := GetProcAddressCUDA(cuMemsetD32Name);
+  cuMemsetD32 := cuMemsetD32Shell;
+  cuMemsetD2D8_ := GetProcAddressCUDA(cuMemsetD2D8Name);
+  cuMemsetD2D8 := cuMemsetD2D8Shell;
+  cuMemsetD2D16_ := GetProcAddressCUDA(cuMemsetD2D16Name);
+  cuMemsetD2D16 := cuMemsetD2D16Shell;
+  cuMemsetD2D32_ := GetProcAddressCUDA(cuMemsetD2D32Name);
+  cuMemsetD2D32 := cuMemsetD2D32Shell;
+  cuFuncSetBlockShape_ := GetProcAddressCUDA(cuFuncSetBlockShapeName);
+  cuFuncSetBlockShape := cuFuncSetBlockShapeShell;
+  cuFuncSetSharedSize_ := GetProcAddressCUDA(cuFuncSetSharedSizeName);
+  cuFuncSetSharedSize := cuFuncSetSharedSizeShell;
+  cuFuncGetAttribute_ := GetProcAddressCUDA(cuFuncGetAttributeName);
+  cuFuncGetAttribute := cuFuncGetAttributeShell;
+  cuArrayCreate_ := GetProcAddressCUDA(cuArrayCreateName);
+  cuArrayCreate := cuArrayCreateShell;
+  cuArrayGetDescriptor_ := GetProcAddressCUDA(cuArrayGetDescriptorName);
+  cuArrayGetDescriptor := cuArrayGetDescriptorShell;
+  cuArrayDestroy_ := GetProcAddressCUDA(cuArrayDestroyName);
+  cuArrayDestroy := cuArrayDestroyShell;
+  cuArray3DCreate_ := GetProcAddressCUDA(cuArray3DCreateName);
+  cuArray3DCreate := cuArray3DCreateShell;
+  cuArray3DGetDescriptor_ := GetProcAddressCUDA(cuArray3DGetDescriptorName);
+  cuArray3DGetDescriptor := cuArray3DGetDescriptorShell;
+  cuTexRefCreate_ := GetProcAddressCUDA(cuTexRefCreateName);
+  cuTexRefCreate := cuTexRefCreateShell;
+  cuTexRefDestroy_ := GetProcAddressCUDA(cuTexRefDestroyName);
+  cuTexRefDestroy := cuTexRefDestroyShell;
+  cuTexRefSetArray_ := GetProcAddressCUDA(cuTexRefSetArrayName);
+  cuTexRefSetArray := cuTexRefSetArrayShell;
+  cuTexRefSetAddress_ := GetProcAddressCUDA(cuTexRefSetAddressName);
+  cuTexRefSetAddress := cuTexRefSetAddressShell;
+  cuTexRefSetAddress2D_ := GetProcAddressCUDA(cuTexRefSetAddress2DName);
+  cuTexRefSetAddress2D := cuTexRefSetAddress2DShell;
+  cuTexRefSetFormat_ := GetProcAddressCUDA(cuTexRefSetFormatName);
+  cuTexRefSetFormat := cuTexRefSetFormatShell;
+  cuTexRefSetAddressMode_ := GetProcAddressCUDA(cuTexRefSetAddressModeName);
+  cuTexRefSetAddressMode := cuTexRefSetAddressModeShell;
+  cuTexRefSetFilterMode_ := GetProcAddressCUDA(cuTexRefSetFilterModeName);
+  cuTexRefSetFilterMode := cuTexRefSetFilterModeShell;
+  cuTexRefSetFlags_ := GetProcAddressCUDA(cuTexRefSetFlagsName);
+  cuTexRefSetFlags := cuTexRefSetFlagsShell;
+  cuTexRefGetAddress_ := GetProcAddressCUDA(cuTexRefGetAddressName);
+  cuTexRefGetAddress := cuTexRefGetAddressShell;
+  cuTexRefGetArray_ := GetProcAddressCUDA(cuTexRefGetArrayName);
+  cuTexRefGetArray := cuTexRefGetArrayShell;
+  cuTexRefGetAddressMode_ := GetProcAddressCUDA(cuTexRefGetAddressModeName);
+  cuTexRefGetAddressMode := cuTexRefGetAddressModeShell;
+  cuTexRefGetFilterMode_ := GetProcAddressCUDA(cuTexRefGetFilterModeName);
+  cuTexRefGetFilterMode := cuTexRefGetFilterModeShell;
+  cuTexRefGetFormat_ := GetProcAddressCUDA(cuTexRefGetFormatName);
+  cuTexRefGetFormat := cuTexRefGetFormatShell;
+  cuTexRefGetFlags_ := GetProcAddressCUDA(cuTexRefGetFlagsName);
+  cuTexRefGetFlags := cuTexRefGetFlagsShell;
+  cuParamSetSize_ := GetProcAddressCUDA(cuParamSetSizeName);
+  cuParamSetSize := cuParamSetSizeShell;
+  cuParamSeti_ := GetProcAddressCUDA(cuParamSetiName);
+  cuParamSeti := cuParamSetiShell;
+  cuParamSetf_ := GetProcAddressCUDA(cuParamSetfName);
+  cuParamSetf := cuParamSetfShell;
+  cuParamSetv_ := GetProcAddressCUDA(cuParamSetvName);
+  cuParamSetv := cuParamSetvShell;
+  cuParamSetTexRef_ := GetProcAddressCUDA(cuParamSetTexRefName);
+  cuParamSetTexRef := cuParamSetTexRefShell;
+  cuLaunch_ := GetProcAddressCUDA(cuLaunchName);
+  cuLaunch := cuLaunchShell;
+  cuLaunchGrid_ := GetProcAddressCUDA(cuLaunchGridName);
+  cuLaunchGrid := cuLaunchGridShell;
+  cuLaunchGridAsync_ := GetProcAddressCUDA(cuLaunchGridAsyncName);
+  cuLaunchGridAsync := cuLaunchGridAsyncShell;
+  cuEventCreate_ := GetProcAddressCUDA(cuEventCreateName);
+  cuEventCreate := cuEventCreateShell;
+  cuEventRecord_ := GetProcAddressCUDA(cuEventRecordName);
+  cuEventRecord := cuEventRecordShell;
+  cuEventQuery_ := GetProcAddressCUDA(cuEventQueryName);
+  cuEventQuery := cuEventQueryShell;
+  cuEventSynchronize_ := GetProcAddressCUDA(cuEventSynchronizeName);
+  cuEventSynchronize := cuEventSynchronizeShell;
+  cuEventDestroy_ := GetProcAddressCUDA(cuEventDestroyName);
+  cuEventDestroy := cuEventDestroyShell;
+  cuEventElapsedTime_ := GetProcAddressCUDA(cuEventElapsedTimeName);
+  cuEventElapsedTime := cuEventElapsedTimeShell;
+  cuStreamCreate_ := GetProcAddressCUDA(cuStreamCreateName);
+  cuStreamCreate := cuStreamCreateShell;
+  cuStreamQuery_ := GetProcAddressCUDA(cuStreamQueryName);
+  cuStreamQuery := cuStreamQueryShell;
+  cuStreamSynchronize_ := GetProcAddressCUDA(cuStreamSynchronizeName);
+  cuStreamSynchronize := cuStreamSynchronizeShell;
+  cuStreamDestroy_ := GetProcAddressCUDA(cuStreamDestroyName);
+  cuStreamDestroy := cuStreamDestroyShell;
+  cuGLCtxCreate_ := GetProcAddressCUDA(cuGLCtxCreateName);
+  cuGLCtxCreate := cuGLCtxCreateShell;
+  cuGraphicsGLRegisterBuffer_ := GetProcAddressCUDA(cuGraphicsGLRegisterBufferName);
+  cuGraphicsGLRegisterBuffer := cuGraphicsGLRegisterBufferShell;
+  cuGraphicsGLRegisterImage_ := GetProcAddressCUDA(cuGraphicsGLRegisterImageName);
+  cuGraphicsGLRegisterImage := cuGraphicsGLRegisterImageShell;
+  cuWGLGetDevice_ := GetProcAddressCUDA(cuWGLGetDeviceName);
+  cuWGLGetDevice := cuWGLGetDeviceShell;
+  cuGraphicsUnregisterResource_ := GetProcAddressCUDA(cuGraphicsUnregisterResourceName);
+  cuGraphicsUnregisterResource := cuGraphicsUnregisterResourceShell;
+  cuGraphicsSubResourceGetMappedArray_ := GetProcAddressCUDA(cuGraphicsSubResourceGetMappedArrayName);
+  cuGraphicsSubResourceGetMappedArray := cuGraphicsSubResourceGetMappedArrayShell;
+  cuGraphicsResourceGetMappedPointer_ := GetProcAddressCUDA(cuGraphicsResourceGetMappedPointerName);
+  cuGraphicsResourceGetMappedPointer := cuGraphicsResourceGetMappedPointerShell;
+  cuGraphicsResourceSetMapFlags_ := GetProcAddressCUDA(cuGraphicsResourceSetMapFlagsName);
+  cuGraphicsResourceSetMapFlags := cuGraphicsResourceSetMapFlagsShell;
+  cuGraphicsMapResources_ := GetProcAddressCUDA(cuGraphicsMapResourcesName);
+  cuGraphicsMapResources := cuGraphicsMapResourcesShell;
+  cuGraphicsUnmapResources_ := GetProcAddressCUDA(cuGraphicsUnmapResourcesName);
+  cuGraphicsUnmapResources := cuGraphicsUnmapResourcesShell;
+  cuGLInit := GetProcAddressCUDA(cuGLInitName);
+  cuGLRegisterBufferObject_ := GetProcAddressCUDA(cuGLRegisterBufferObjectName);
+  cuGLRegisterBufferObject := cuGLRegisterBufferObjectShell;
+  cuGLMapBufferObject_ := GetProcAddressCUDA(cuGLMapBufferObjectName);
+  cuGLMapBufferObject := cuGLMapBufferObjectShell;
+  cuGLUnmapBufferObject_ := GetProcAddressCUDA(cuGLUnmapBufferObjectName);
+  cuGLUnmapBufferObject := cuGLUnmapBufferObjectShell;
+  cuGLUnregisterBufferObject_ := GetProcAddressCUDA(cuGLUnregisterBufferObjectName);
+  cuGLUnregisterBufferObject := cuGLUnregisterBufferObjectShell;
+  cuGLSetBufferObjectMapFlags_ := GetProcAddressCUDA(cuGLSetBufferObjectMapFlagsName);
+  cuGLSetBufferObjectMapFlags := cuGLSetBufferObjectMapFlagsShell;
+  cuGLMapBufferObjectAsync_ := GetProcAddressCUDA(cuGLMapBufferObjectAsyncName);
+  cuGLMapBufferObjectAsync := cuGLMapBufferObjectAsyncShell;
+  cuGLUnmapBufferObjectAsync_ := GetProcAddressCUDA(cuGLUnmapBufferObjectAsyncName);
+  cuGLUnmapBufferObjectAsync := cuGLUnmapBufferObjectAsyncShell;
+{$ENDIF GLS_CUDA_DEBUG_MODE}
+  cuDriverGetVersion(V);
+  {$IFDEF USE_LOGGING}
+    LogInfoFmt('%s version %d is loaded', [CUDAAPIDLL, V]);
+  {$ENDIF}
+  Result := True;
+end;
+
+function IsCUDAInitialized: Boolean;
+begin
+  Result := (CUDAHandle <> INVALID_MODULEHANDLE);
+end;
+
+function Get_CUDA_API_Error_String(AError: TCUresult): string;
+begin
+  if AError = CUDA_SUCCESS then
+    Result := 'No errors'
+  else if AError = CUDA_ERROR_INVALID_VALUE then
+    Result := 'Invalid value'
+  else if AError = CUDA_ERROR_OUT_OF_MEMORY then
+    Result := 'Out of memory'
+  else if AError = CUDA_ERROR_NOT_INITIALIZED then
+    Result := 'Driver not initialized'
+  else if AError = CUDA_ERROR_DEINITIALIZED then
+    Result := 'Driver deinitialized'
+  else if AError = CUDA_ERROR_NO_DEVICE then
+    Result := 'No CUDA-capable device available'
+  else if AError = CUDA_ERROR_INVALID_DEVICE then
+    Result := 'Invalid device'
+  else if AError = CUDA_ERROR_INVALID_IMAGE then
+    Result := 'Invalid kernel image'
+  else if AError = CUDA_ERROR_INVALID_CONTEXT then
+    Result := 'Invalid context'
+  else if AError = CUDA_ERROR_CONTEXT_ALREADY_CURRENT then
+    Result := 'Context already current'
+  else if AError = CUDA_ERROR_MAP_FAILED then
+    Result := 'Map failed'
+  else if AError = CUDA_ERROR_UNMAP_FAILED then
+    Result := 'Unmap failed'
+  else if AError = CUDA_ERROR_ARRAY_IS_MAPPED then
+    Result := 'Array is mapped'
+  else if AError = CUDA_ERROR_ALREADY_MAPPED then
+    Result := 'Already mapped'
+  else if AError = CUDA_ERROR_NO_BINARY_FOR_GPU then
+    Result := 'No binary for GPU'
+  else if AError = CUDA_ERROR_ALREADY_ACQUIRED then
+    Result := 'Already acquired'
+  else if AError = CUDA_ERROR_NOT_MAPPED then
+    Result := 'Not mapped'
+  else if AError = CUDA_ERROR_NOT_MAPPED_AS_ARRAY then
+    Result := 'Not mapped as array'
+  else if AError = CUDA_ERROR_NOT_MAPPED_AS_POINTER then
+    Result := 'Not mapped as pointer'
+  else if AError = CUDA_ERROR_INVALID_SOURCE then
+    Result := 'Invalid source'
+  else if AError = CUDA_ERROR_FILE_NOT_FOUND then
+    Result := 'File not found'
+  else if AError = CUDA_ERROR_INVALID_HANDLE then
+    Result := 'Invalid handle'
+  else if AError = CUDA_ERROR_NOT_FOUND then
+    Result := 'Not found'
+  else if AError = CUDA_ERROR_NOT_READY then
+    Result := 'CUDA not ready'
+  else if AError = CUDA_ERROR_LAUNCH_FAILED then
+    Result := 'Launch failed'
+  else if AError = CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES then
+    Result := 'Launch exceeded resources'
+  else if AError = CUDA_ERROR_LAUNCH_TIMEOUT then
+    Result := 'Launch exceeded timeout'
+  else if AError = CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING then
+    Result := 'Launch with incompatible texturing'
+  else if AError = CUDA_ERROR_POINTER_IS_64BIT then
+    Result := 'Pointer is 64bit'
+  else if AError = CUDA_ERROR_SIZE_IS_64BIT then
+    Result := 'Size is 64bit'
+  else
+    Result := 'Unknown error';
+end;
+
+end.

+ 169 - 179
Source/GLS.CUDAParallelPrimitives.pas → Source/Import.CUDAParallelPrimitives.pas

@@ -1,179 +1,169 @@
-//
-// This unit is part of the GLScene Engine, http://glscene.org
-//
-
-unit GLS.CUDAParallelPrimitives;
-
-(* CUDA Parallel Primitives *)
-
-// -------------------------------------------------------------
-// cuDPP -- CUDA Data Parallel Primitives library
-// -------------------------------------------------------------
-// $Revision: 4567 $
-// $Date: 2020-05-17
-// -------------------------------------------------------------
-// This source code is distributed under the terms of license.txt in
-// the root directory of this source distribution.
-// -------------------------------------------------------------
-
-interface
-
-const
-  CUDPPDLL = 'cudpp32.dll';
-  CUDPP_INVALID_HANDLE = $C0DABAD1;
-
-type
-
-  TCUDPPResult = (
-    CUDPP_SUCCESS, // No error.
-    CUDPP_ERROR_INVALID_HANDLE, // Specified handle (for example,
-    //          to a plan) is invalid.
-    CUDPP_ERROR_ILLEGAL_CONFIGURATION, // Specified configuration is
-    //     illegal. For example, an
-    //     invalid or illogical
-    //     combination of options.
-    CUDPP_ERROR_UNKNOWN // Unknown or untraceable error.
-    );
-
-  TCUDPPOption = (
-    CUDPP_OPTION_FORWARD, // Algorithms operate forward:
-    // from start to end of input
-    // array
-    CUDPP_OPTION_BACKWARD, // Algorithms operate backward:
-    // from end to start of array
-    CUDPP_OPTION_EXCLUSIVE, // Exclusive (for scans) - scan
-    // includes all elements up to (but
-    // not including) the current
-    // element
-    CUDPP_OPTION_INCLUSIVE, // Inclusive (for scans) - scan
-    // includes all elements up to and
-    // including the current element
-    CUDPP_OPTION_CTA_LOCAL, // Algorithm performed only on
-    // the CTAs (blocks) with no
-    // communication between blocks.
-    // @todo Currently ignored.
-    CUDPP_OPTION_KEYS_ONLY, // No associated value to a key
-    // (for global radix sort)
-    CUDPP_OPTION_KEY_VALUE_PAIRS // Each key has an associated value
-    );
-
-  TCUDPPDatatype = (
-    CUDPP_CHAR, // Character type (C char)
-    CUDPP_UCHAR, // Unsigned character (byte) type (C unsigned char)
-    CUDPP_INT, // Integer type (C int)
-    CUDPP_UINT, // Unsigned integer type (C unsigned int)
-    CUDPP_FLOAT // Float type (C float)
-    );
-
-  TCUDPPOperator = (
-    CUDPP_ADD, // Addition of two operands
-    CUDPP_MULTIPLY, // Multiplication of two operands
-    CUDPP_MIN, // Minimum of two operands
-    CUDPP_MAX // Maximum of two operands
-    );
-
-  TCUDPPAlgorithm = (
-    CUDPP_SCAN,
-    CUDPP_SEGMENTED_SCAN,
-    CUDPP_COMPACT,
-    CUDPP_REDUCE,
-    CUDPP_SORT_RADIX,
-    CUDPP_SPMVMULT, // Sparse matrix-dense vector multiplication
-    CUDPP_RAND_MD5, // Pseudo Random Number Generator using MD5 hash algorithm
-    CUDPP_ALGORITHM_INVALID // Placeholder at end of enum
-    );
-
-  TCUDPPConfiguration = record
-    algorithm: TCUDPPAlgorithm; // The algorithm to be used
-    op: TCUDPPOperator; // The numerical operator to be applied
-    datatype: TCUDPPDatatype; // The datatype of the input arrays
-    options: TCUDPPoption; // Options to configure the algorithm
-  end;
-
-  TCUDPPHandle = NativeUInt;
-
-  // Plan allocation (for scan, sort, and compact)
-
-function cudppPlan(var planHandle: TCUDPPHandle;
-  config: TCUDPPConfiguration;
-  n: NativeUInt;
-  rows: NativeUInt;
-  rowPitch: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppDestroyPlan(plan: TCUDPPHandle): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-// Scan and sort algorithms
-
-function cudppScan(planHandle: TCUDPPHandle;
-  var d_out;
-  var d_in,
-  numElements: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppMultiScan(planHandle: TCUDPPHandle;
-  var d_out;
-  var d_in;
-  numElements: NativeUInt;
-  numRows: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppSegmentedScan(planHandle: TCUDPPHandle;
-  var d_out;
-  var d_idata;
-  const d_iflags: PCardinal;
-  numElements: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppCompact(planHandle: TCUDPPHandle;
-  var d_out;
-  var d_numValidElements: NativeUInt;
-  var d_in;
-  const d_isValid: PCardinal;
-  numElements: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppSort(planHandle: TCUDPPHandle;
-  var d_keys;
-  var d_values;
-  keybits: Integer;
-  numElements: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-// Sparse matrix allocation
-
-function cudppSparseMatrix(var sparseMatrixHandle: TCUDPPHandle;
-  config: TCUDPPConfiguration;
-  n: NativeUInt;
-  rows: NativeUInt;
-  var A;
-  const h_rowIndices: PCardinal;
-  const h_indices: PCardinal): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppDestroySparseMatrix(sparseMatrixHandle: TCUDPPHandle):
-  TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-// Sparse matrix-vector algorithms
-
-function cudppSparseMatrixVectorMultiply(sparseMatrixHandle: TCUDPPHandle;
-  var d_y;
-  var d_x): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-// random number generation algorithms
-function cudppRand(planHandle: TCUDPPHandle;
-  var d_out;
-  numElements: NativeUInt): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-function cudppRandSeed(const planHandle: TCUDPPHandle;
-  seed: Cardinal): TCUDPPResult;
-{$IFDEF MSWINDOWS}stdcall;{$ELSE}cdecl;{$ENDIF}external CUDPPDLL;
-
-implementation
-
-end.
-
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit Import.CUDAParallelPrimitives;
+
+(* CUDA Parallel Primitives *)
+
+// -------------------------------------------------------------
+// cuDPP -- CUDA Data Parallel Primitives library
+// -------------------------------------------------------------
+// $Revision: 4567 $
+// $Date: 2020-05-17
+// -------------------------------------------------------------
+// This source code is distributed under the terms of license.txt in
+// the root directory of this source distribution.
+// -------------------------------------------------------------
+
+interface
+
+const
+  CUDPPDLL = 'cudpp32.dll';
+  CUDPP_INVALID_HANDLE = $C0DABAD1;
+
+type
+
+  TCUDPPResult = (
+    CUDPP_SUCCESS, // No error.
+    CUDPP_ERROR_INVALID_HANDLE, // Specified handle (for example,
+    //          to a plan) is invalid.
+    CUDPP_ERROR_ILLEGAL_CONFIGURATION, // Specified configuration is
+    //     illegal. For example, an
+    //     invalid or illogical
+    //     combination of options.
+    CUDPP_ERROR_UNKNOWN // Unknown or untraceable error.
+    );
+
+  TCUDPPOption = (
+    CUDPP_OPTION_FORWARD, // Algorithms operate forward:
+    // from start to end of input
+    // array
+    CUDPP_OPTION_BACKWARD, // Algorithms operate backward:
+    // from end to start of array
+    CUDPP_OPTION_EXCLUSIVE, // Exclusive (for scans) - scan
+    // includes all elements up to (but
+    // not including) the current
+    // element
+    CUDPP_OPTION_INCLUSIVE, // Inclusive (for scans) - scan
+    // includes all elements up to and
+    // including the current element
+    CUDPP_OPTION_CTA_LOCAL, // Algorithm performed only on
+    // the CTAs (blocks) with no
+    // communication between blocks.
+    // @todo Currently ignored.
+    CUDPP_OPTION_KEYS_ONLY, // No associated value to a key
+    // (for global radix sort)
+    CUDPP_OPTION_KEY_VALUE_PAIRS // Each key has an associated value
+    );
+
+  TCUDPPDatatype = (
+    CUDPP_CHAR, // Character type (C char)
+    CUDPP_UCHAR, // Unsigned character (byte) type (C unsigned char)
+    CUDPP_INT, // Integer type (C int)
+    CUDPP_UINT, // Unsigned integer type (C unsigned int)
+    CUDPP_FLOAT // Float type (C float)
+    );
+
+  TCUDPPOperator = (
+    CUDPP_ADD, // Addition of two operands
+    CUDPP_MULTIPLY, // Multiplication of two operands
+    CUDPP_MIN, // Minimum of two operands
+    CUDPP_MAX // Maximum of two operands
+    );
+
+  TCUDPPAlgorithm = (
+    CUDPP_SCAN,
+    CUDPP_SEGMENTED_SCAN,
+    CUDPP_COMPACT,
+    CUDPP_REDUCE,
+    CUDPP_SORT_RADIX,
+    CUDPP_SPMVMULT, // Sparse matrix-dense vector multiplication
+    CUDPP_RAND_MD5, // Pseudo Random Number Generator using MD5 hash algorithm
+    CUDPP_ALGORITHM_INVALID // Placeholder at end of enum
+    );
+
+  TCUDPPConfiguration = record
+    algorithm: TCUDPPAlgorithm; // The algorithm to be used
+    op: TCUDPPOperator; // The numerical operator to be applied
+    datatype: TCUDPPDatatype; // The datatype of the input arrays
+    options: TCUDPPoption; // Options to configure the algorithm
+  end;
+
+  TCUDPPHandle = NativeUInt;
+
+  // Plan allocation (for scan, sort, and compact)
+
+function cudppPlan(var planHandle: TCUDPPHandle;
+  config: TCUDPPConfiguration;
+  n: NativeUInt;
+  rows: NativeUInt;
+  rowPitch: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppDestroyPlan(plan: TCUDPPHandle): TCUDPPResult;stdcall;external CUDPPDLL;
+
+// Scan and sort algorithms
+
+function cudppScan(planHandle: TCUDPPHandle;
+  var d_out;
+  var d_in,
+  numElements: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppMultiScan(planHandle: TCUDPPHandle;
+  var d_out;
+  var d_in;
+  numElements: NativeUInt;
+  numRows: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppSegmentedScan(planHandle: TCUDPPHandle;
+  var d_out;
+  var d_idata;
+  const d_iflags: PCardinal;
+  numElements: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppCompact(planHandle: TCUDPPHandle;
+  var d_out;
+  var d_numValidElements: NativeUInt;
+  var d_in;
+  const d_isValid: PCardinal;
+  numElements: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppSort(planHandle: TCUDPPHandle;
+  var d_keys;
+  var d_values;
+  keybits: Integer;
+  numElements: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+// Sparse matrix allocation
+
+function cudppSparseMatrix(var sparseMatrixHandle: TCUDPPHandle;
+  config: TCUDPPConfiguration;
+  n: NativeUInt;
+  rows: NativeUInt;
+  var A;
+  const h_rowIndices: PCardinal;
+  const h_indices: PCardinal): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppDestroySparseMatrix(sparseMatrixHandle: TCUDPPHandle):
+  TCUDPPResult;stdcall;external CUDPPDLL;
+
+// Sparse matrix-vector algorithms
+
+function cudppSparseMatrixVectorMultiply(sparseMatrixHandle: TCUDPPHandle;
+  var d_y;
+  var d_x): TCUDPPResult;stdcall;external CUDPPDLL;
+
+// random number generation algorithms
+function cudppRand(planHandle: TCUDPPHandle;
+  var d_out;
+  numElements: NativeUInt): TCUDPPResult;stdcall;external CUDPPDLL;
+
+function cudppRandSeed(const planHandle: TCUDPPHandle;
+  seed: Cardinal): TCUDPPResult;stdcall;external CUDPPDLL;
+
+//-------------------------------------
+implementation
+//-------------------------------------
+
+end.
+

+ 695 - 0
Source/Import.CUDARuntime.pas

@@ -0,0 +1,695 @@
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit Import.CUDARuntime;
+
+(* GLScene CUDA Runtime *)
+
+(*
+ * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.  Users and possessors of this source code
+ * are hereby granted a nonexclusive, royalty-free license to use this code
+ * in individual and commercial software.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users.   This source code is a "commercial item" as
+ * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+ * "commercial computer  software"  and "commercial computer software
+ * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ *)
+
+interface
+
+{$I GLScene.inc}
+
+uses
+  Winapi.Windows,
+  Import.CUDAApi;
+
+const
+{$IFDEF WIN32}
+  CUDARTDLLNAMES: array [0 .. 9] of string = (
+    'cudart32_42_9', 'cudart32_41_28',
+    'cudart32_40_10', 'cudart32_32_16', 'cudart32_31_4',
+    'cudart32_30_14', 'cudart32_30_9', 'cudart32_30_8', 'cudart32', 'cudart');
+{$ENDIF}
+
+{$IFDEF WIN64}
+  CUDARTDLLNAMES: array [0 .. 7] of string = (
+    'cudart64_42_9', 'cudart64_41_28',
+    'cudart64_40_10', 'cudart64_32_16', 'cudart64_31_4',
+    'cudart64_30_14', 'cudart64_30_9', 'cudart64_30_8');
+{$ENDIF}
+
+const
+  // single precision constants
+  CUDART_INF_F: Single = $7F800000;
+  CUDART_NAN_F: Single = $7FFFFFFF;
+  CUDART_MIN_DENORM_F: Single = $00000001;
+  CUDART_MAX_NORMAL_F: Single = $7F7FFFFF;
+  CUDART_NEG_ZERO_F: Single = $80000000;
+  CUDART_ZERO_F = 0.0;
+  CUDART_ONE_F = 1.0;
+  CUDART_SQRT_HALF_F = 0.707106781;
+  CUDART_SQRT_TWO_F = 1.414213562;
+  CUDART_THIRD_F = 0.333333333;
+  CUDART_PIO4_F = 0.785398163;
+  CUDART_PIO2_F = 1.570796327;
+  CUDART_3PIO4_F = 2.356194490;
+  CUDART_2_OVER_PI_F = 0.636619772;
+  CUDART_PI_F = 3.141592654;
+  CUDART_L2E_F = 1.442695041;
+  CUDART_L2T_F = 3.321928094;
+  CUDART_LG2_F = 0.301029996;
+  CUDART_LGE_F = 0.434294482;
+  CUDART_LN2_F = 0.693147181;
+  CUDART_LNT_F = 2.302585093;
+  CUDART_LNPI_F = 1.144729886;
+  CUDART_TWO_TO_M126_F = 1.175494351E-38;
+  CUDART_TWO_TO_126_F = 8.507059173E37;
+  CUDART_NORM_HUGE_F = 3.402823466E38;
+  CUDART_TWO_TO_23_F = 8388608.0;
+  CUDART_TWO_TO_24_F = 16777216.0;
+  CUDART_TWO_TO_31_F = 2147483648.0;
+  CUDART_TWO_TO_32_F = 4294967296.0;
+  CUDART_REMQUO_BITS_F = 3;
+  CUDART_REMQUO_MASK_F = CUDART_REMQUO_BITS_F;
+  CUDART_TRIG_PLOSS_F = 48039.0;
+
+  // double precision constants */
+{$IFNDEF CUDA_NO_SM_13_DOUBLE_INTRINSICS}
+  CUDART_INF: Double = $7FF0000000000000;
+  CUDART_NAN: Double = $FFF8000000000000;
+  CUDART_NEG_ZERO: Double = $8000000000000000;
+  CUDART_MIN_DENORM: Double = $0000000000000001;
+{$ELSE} // not CUDA_NO_SM_13_DOUBLE_INTRINSICS
+  CUDART_INF: Double = $7FF0000000000000;
+  CUDART_NAN: Double = $FFF8000000000000;
+  CUDART_NEG_ZERO: Double = $8000000000000000;
+  CUDART_MIN_DENORM: Double = $0000000000000001;
+{$ENDIF}
+  CUDART_ZERO = 0.0;
+  CUDART_ONE = 1.0;
+  CUDART_SQRT_TWO = 1.4142135623730951E+0;
+  CUDART_SQRT_HALF = 7.0710678118654757E-1;
+  CUDART_THIRD = 3.3333333333333333E-1;
+  CUDART_TWOTHIRD = 6.6666666666666667E-1;
+  CUDART_PIO4 = 7.8539816339744828E-1;
+  CUDART_PIO4_HI = 7.8539816339744828E-1;
+  CUDART_PIO4_LO = 3.0616169978683830E-17;
+  CUDART_PIO2 = 1.5707963267948966E+0;
+  CUDART_PIO2_HI = 1.5707963267948966E+0;
+  CUDART_PIO2_LO = 6.1232339957367660E-17;
+  CUDART_3PIO4 = 2.3561944901923448E+0;
+  CUDART_2_OVER_PI = 6.3661977236758138E-1;
+  CUDART_PI = 3.1415926535897931E+0;
+  CUDART_PI_HI = 3.1415926535897931E+0;
+  CUDART_PI_LO = 1.2246467991473532E-16;
+  CUDART_SQRT_2PI_HI = 2.5066282746310007E+0;
+  CUDART_SQRT_2PI_LO = -1.8328579980459167E-16;
+  CUDART_SQRT_PIO2_HI = 1.2533141373155003E+0;
+  CUDART_SQRT_PIO2_LO = -9.1642899902295834E-17;
+  CUDART_L2E = 1.4426950408889634E+0;
+  CUDART_L2E_HI = 1.4426950408889634E+0;
+  CUDART_L2E_LO = 2.0355273740931033E-17;
+  CUDART_L2T = 3.3219280948873622E+0;
+  CUDART_LG2 = 3.0102999566398120E-1;
+  CUDART_LG2_HI = 3.0102999566398120E-1;
+  CUDART_LG2_LO = -2.8037281277851704E-18;
+  CUDART_LGE = 4.3429448190325182E-1;
+  CUDART_LGE_HI = 4.3429448190325182E-1;
+  CUDART_LGE_LO = 1.09831965021676510E-17;
+  CUDART_LN2 = 6.9314718055994529E-1;
+  CUDART_LN2_HI = 6.9314718055994529E-1;
+  CUDART_LN2_LO = 2.3190468138462996E-17;
+  CUDART_LNT = 2.3025850929940459E+0;
+  CUDART_LNT_HI = 2.3025850929940459E+0;
+  CUDART_LNT_LO = -2.1707562233822494E-16;
+  CUDART_LNPI = 1.1447298858494002E+0;
+  CUDART_LN2_X_1024 = 7.0978271289338397E+2;
+  CUDART_LN2_X_1025 = 7.1047586007394398E+2;
+  CUDART_LN2_X_1075 = 7.4513321910194122E+2;
+  CUDART_LG2_X_1024 = 3.0825471555991675E+2;
+  CUDART_LG2_X_1075 = 3.2360724533877976E+2;
+  CUDART_TWO_TO_23 = 8388608.0;
+  CUDART_TWO_TO_52 = 4503599627370496.0;
+  CUDART_TWO_TO_54 = 18014398509481984.0;
+  CUDART_TWO_TO_M54 = 5.5511151231257827E-17;
+  CUDART_TWO_TO_M1022 = 2.22507385850720140E-308;
+  CUDART_TRIG_PLOSS = 2147483648.0;
+
+type
+  TcudaError = (cudaSuccess, cudaErrorMissingConfiguration,
+    cudaErrorMemoryAllocation, cudaErrorInitializationError,
+    cudaErrorLaunchFailure, cudaErrorPriorLaunchFailure, cudaErrorLaunchTimeout,
+    cudaErrorLaunchOutOfResources, cudaErrorInvalidDeviceFunction,
+    cudaErrorInvalidConfiguration, cudaErrorInvalidDevice,
+    cudaErrorInvalidValue, cudaErrorInvalidPitchValue, cudaErrorInvalidSymbol,
+    cudaErrorMapBufferObjectFailed, cudaErrorUnmapBufferObjectFailed,
+    cudaErrorInvalidHostPointer, cudaErrorInvalidDevicePointer,
+    cudaErrorInvalidTexture, cudaErrorInvalidTextureBinding,
+    cudaErrorInvalidChannelDescriptor, cudaErrorInvalidMemcpyDirection,
+    cudaErrorAddressOfConstant, cudaErrorTextureFetchFailed,
+    cudaErrorTextureNotBound, cudaErrorSynchronizationError,
+    cudaErrorInvalidFilterSetting, cudaErrorInvalidNormSetting,
+    cudaErrorMixedDeviceExecution, cudaErrorCudartUnloading, cudaErrorUnknown,
+    cudaErrorNotYetImplemented, cudaErrorMemoryValueTooLarge,
+    cudaErrorInvalidResourceHandle, cudaErrorNotReady, cudaErrorStartupFailure,
+    cudaErrorApiFailureBase);
+
+  { +//DEVICE_BUILTIN*/ }
+  TCudaChannelFormatKind = (cudaChannelFormatKindSigned,
+    cudaChannelFormatKindUnsigned, cudaChannelFormatKindFloat);
+
+  TCudaGLMapFlags = (cudaGLMapFlagsNone,
+    /// < Default; Assume resource can be read/written
+    cudaGLMapFlagsReadOnly,
+    /// < CUDA kernels will not write to this resource
+    cudaGLMapFlagsWriteDiscard);
+  /// < CUDA kernels will only write to and will not read from this resource
+
+  { +//DEVICE_BUILTIN*/ }
+  PcudaChannelFormatDesc = ^TCudaChannelFormatDesc;
+
+  TCudaChannelFormatDesc = record
+    x: Integer;
+    y: Integer;
+    z: Integer;
+    w: Integer;
+    f: TCudaChannelFormatKind;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaArray = record
+  end; // !ATTENTION foreward Declaration?)
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaMemcpyKind = (cudaMemcpyHostToHost { = 0 } , cudaMemcpyHostToDevice,
+    cudaMemcpyDeviceToHost, cudaMemcpyDeviceToDevice);
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaPitchedPtr = record
+    ptr: Pointer;
+    pitch: NativeUInt;
+    xsize: NativeUInt;
+    ysize: NativeUInt;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaExtent = record
+    width: NativeUInt;
+    height: NativeUInt;
+    depth: NativeUInt;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaPos = record
+    x: NativeUInt;
+    y: NativeUInt;
+    z: NativeUInt;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  TcudaMemcpy3DParms = record
+    srcArray: Pointer;
+    srcPos: TcudaPos;
+    srcPtr: TcudaPitchedPtr;
+    dstArray: Pointer;
+    dstPos: TcudaPos;
+    dstPtr: TcudaPitchedPtr;
+    extent: TcudaExtent;
+    kind: TcudaMemcpyKind;
+  end;
+
+  { +//DEVICE_BUILTIN*/ }
+  PCudaDeviceProp = ^TCudaDeviceProp;
+
+  TCudaDeviceProp = record
+    name: array [0 .. 256 - 1] of AnsiChar;
+    totalGlobalMem: NativeUInt;
+    sharedMemPerBlock: NativeUInt;
+    regsPerBlock: Integer;
+    warpSize: Integer;
+    memPitch: NativeUInt;
+    maxThreadsPerBlock: Integer;
+    maxThreadsDim: array [0 .. 3 - 1] of Integer;
+    maxGridSize: array [0 .. 3 - 1] of Integer;
+    clockRate: Integer;
+    totalConstMem: NativeUInt;
+    major: Integer;
+    minor: Integer;
+    textureAlignment: NativeUInt;
+    deviceOverlap: Integer;
+    multiProcessorCount: Integer;
+    // Specified whether there is a run time limit on kernels
+    kernelExecTimeoutEnabled: Integer;
+    // Device is egrated as opposed to discrete
+    egrated: Integer;
+    // Device can map host memory with cudaHostAlloc/cudaHostGetDevicePoer
+    canMapHostMemory: Integer;
+    // Compute mode (See ::cudaComputeMode)
+    computeMode: Integer;
+    // Maximum 1D texture size
+    maxTexture1D: Integer;
+    // Maximum 2D texture dimensions
+    maxTexture2D: array[0..1] of Integer;
+    // Maximum 3D texture dimensions
+    maxTexture3D: array[0..2] of Integer;
+    // Maximum 2D texture array dimensions
+    maxTexture2DArray: array[0..2] of Integer;
+    // Alignment requirements for surfaces
+    surfaceAlignment: NativeUInt;
+     // Device can possibly execute multiple kernels concurrently
+    concurrentKernels: Integer;
+    // Device has ECC support enabled
+    ECCEnabled: Integer;
+    // PCI bus ID of the device
+    pciBusID: Integer;
+    // PCI device ID of the device
+    pciDeviceID: Integer;
+    // 1 if device is a Tesla device using TCC driver, 0 otherwise
+    tccDriver: Integer;
+    __cudaReserved: array [0 .. 20] of Integer;
+  end;
+
+  TcudaTextureAddressMode = (cudaAddressModeWrap, cudaAddressModeClamp, cudaAddressModeMirror);
+
+  TcudaTextureFilterMode = (cudaFilterModePoint, cudaFilterModeLinear);
+
+  TcudaTextureReadMode = (cudaReadModeElementType, cudaReadModeNormalizedFloat);
+
+  PTextureReference = ^TTextureReference;
+
+  TTextureReference = record
+    normalized: Integer;
+    filterMode: TcudaTextureFilterMode;
+    addressMode: array [0 .. 2] of TcudaTextureAddressMode;
+    channelDesc: TCudaChannelFormatDesc;
+    __cudaReserved: array [0 .. 15] of Integer;
+  end;
+
+  PcudaArray = ^TcudaArray;
+
+  { +//****************************************************************************** }
+  { -** }
+  { -* SHORTHAND TYPE DEFINITION USED BY RUNTIME API* }
+  { -** }
+  { =*******************************************************************************/ }
+
+  { +//DEVICE_BUILTIN*/ }
+  cudaError_t = TcudaError;
+  { +//DEVICE_BUILTIN*/ }
+  cudaStream_t = Integer;
+  { +//DEVICE_BUILTIN*/ }
+  cudaEvent_t = Integer;
+
+(*******************************************************************************)
+
+var
+
+cudaBindTexture: function(var offset: NativeUInt; const texref: PTextureReference;
+    var devPtr: Pointer; var desc: TCudaChannelFormatDesc; size: NativeUInt): cudaError_t;stdcall;
+cudaBindTexture2D: function(var offset: NativeUInt; const texref: PTextureReference; const devPtr: Pointer;
+    var desc: TCudaChannelFormatDesc; width, height, pitch: NativeUInt): cudaError_t;stdcall;
+cudaBindTextureToArray: function(const texref: PTextureReference; const cudaArray: PcudaArray): cudaError_t;stdcall;
+cudaUnbindTexture: function(const texref: PTextureReference): cudaError_t;stdcall;
+cudaGetTextureAlignmentOffset: function(offset: NativeUInt; const texref: PTextureReference): cudaError_t;stdcall;
+cudaGetTextureReference: function(const texref: PTextureReference; const symbol: PAnsiChar): cudaError_t;stdcall;
+cudaGetChannelDesc: function(var desc: TCudaChannelFormatDesc; const array_: Pointer): cudaError_t;stdcall;
+cudaCreateChannelDesc: function(x, y, z, w: Integer; f: TCudaChannelFormatKind): TCudaChannelFormatDesc;stdcall;
+(* ******************************************************************************
+  *                                                                              *
+  *                                                                              *
+  *                                                                              *
+  ****************************************************************************** *)
+
+cudaMalloc3D: function(var pitchedDevPtr: TcudaPitchedPtr; extent: TcudaExtent): cudaError_t;stdcall;
+cudaMalloc3DArray: function(var arrayPtr: PcudaArray; const desc: TCudaChannelFormatDesc;
+  extent: TcudaExtent; flags: Cardinal): cudaError_t;stdcall;
+cudaMemset3D: function(pitchedDevPtr: TcudaPitchedPtr; value: Integer; extent: TcudaExtent): cudaError_t;stdcall;
+cudaMemcpy3D: function(const p: TcudaMemcpy3DParms): cudaError_t;stdcall;
+cudaMemcpy3DAsync: function(const p: TcudaMemcpy3DParms; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMalloc: function(var devPtr; size: NativeUInt): cudaError_t;stdcall;
+cudaMallocHost: function(var ptr: Pointer; size: NativeUInt): cudaError_t;stdcall;
+cudaMallocPitch: function(var devPtr; var pitch: NativeUInt; width: NativeUInt; height: NativeUInt): cudaError_t;stdcall;
+cudaMallocArray: function(var aarray: Pointer; var desc: TCudaChannelFormatDesc; width: NativeUInt; height: NativeUInt): cudaError_t;stdcall;
+cudaFree: function(devPtr: Pointer): cudaError_t;stdcall;
+cudaFreeHost: function(ptr: Pointer): cudaError_t;stdcall;
+cudaFreeArray: function(const aarray: Pointer): cudaError_t;stdcall;
+cudaHostAlloc: function(var pHost: Pointer; bytes: NativeUInt; flags: Cardinal): cudaError_t;stdcall;
+cudaHostGetDevicePointer: function(var pDevice: Pointer; pHost: Pointer; flags: Cardinal): cudaError_t;stdcall;
+cudaHostGetFlags: function(var pFlags: Cardinal; pHost: Pointer): cudaError_t;stdcall;
+cudaMemGetInfo: function(var free: NativeUInt; var total: NativeUInt): cudaError_t;stdcall;
+cudaMemcpy: function(dst: Pointer; src: Pointer;
+  count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;stdcall;
+cudaMemcpyToArray: function(var dst: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; var src;
+  count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;stdcall;
+cudaMemcpyFromArray: function(var dst; const src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt;
+  count: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;stdcall;
+cudaMemcpyArrayToArray: function(dst: PcudaArray; wOffsetDst: NativeUInt; hOffsetDst: NativeUInt; const src: PcudaArray; wOffsetSrc: NativeUInt;
+  hOffsetSrc: NativeUInt; count: NativeUInt; const kind: TcudaMemcpyKind = cudaMemcpyDeviceToDevice): cudaError_t;stdcall;
+cudaMemcpy2D: function(var dst; dpitch: NativeUInt; var src; spitch: NativeUInt;
+  width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;stdcall;
+cudaMemcpy2DToArray: function(dst: PcudaArray; wOffset: NativeUInt;
+  hOffset: NativeUInt; var src; spitch: NativeUInt; width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind): cudaError_t;stdcall;
+cudaMemcpy2DFromArray: function(var dst; dpitch: NativeUInt; src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; width: NativeUInt; height: NativeUInt;
+  kind: TcudaMemcpyKind): cudaError_t;stdcall;
+cudaMemcpy2DArrayToArray: function(dst: PcudaArray; wOffsetDst: NativeUInt;
+  hOffsetDst: NativeUInt; src: PcudaArray; wOffsetSrc: NativeUInt; hOffsetSrc: NativeUInt;
+  width: NativeUInt; height: NativeUInt; const kind: TcudaMemcpyKind = cudaMemcpyDeviceToDevice): cudaError_t;stdcall;
+cudaMemcpyToSymbol: function(symbol: PAnsiChar; var src; count: NativeUInt; const offset: NativeUInt = 0;
+  const kind: TcudaMemcpyKind = cudaMemcpyHostToDevice): cudaError_t;stdcall;
+cudaMemcpyFromSymbol: function(var dst; symbol: PAnsiChar; count: NativeUInt; const offset: NativeUInt = 0;
+  const kind: TcudaMemcpyKind = cudaMemcpyDeviceToHost): cudaError_t;stdcall;
+
+{ +//*************************************************************************** }
+{ -** }
+{ -** }
+{ -** }
+{ =***************************************************************************** }
+
+cudaMemcpyAsync: function(var dst; const src; count: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpyToArrayAsync: function(dst: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; const src; count: NativeUInt; kind: TcudaMemcpyKind;
+  stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpyFromArrayAsync: function(var dst; const src: PcudaArray;
+  wOffset: NativeUInt; hOffset: NativeUInt; count: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpy2DAsync: function(var dst; dpitch: NativeUInt; const src;
+  spitch: NativeUInt; width: NativeUInt; height: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpy2DToArrayAsync: function(dst: PcudaArray; wOffset: NativeUInt;
+  hOffset: NativeUInt; const src; spitch: NativeUInt; width: NativeUInt; height: NativeUInt;
+  kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpy2DFromArrayAsync: function(var dst; dpitch: NativeUInt;
+  const src: PcudaArray; wOffset: NativeUInt; hOffset: NativeUInt; width: NativeUInt;
+  height: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpyToSymbolAsync: function(const symbol: PAnsiChar; const src;
+  count: NativeUInt; offset: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+cudaMemcpyFromSymbolAsync: function(var dst; const symbol: PAnsiChar;
+  count: NativeUInt; offset: NativeUInt; kind: TcudaMemcpyKind; stream: cudaStream_t): cudaError_t;stdcall;
+
+(******************************************************************************
+ *                                                                            *
+ *                                                                            *
+ *                                                                            *
+ *****************************************************************************)
+
+cudaMemset: function(var devPtr; value: Integer; count: NativeUInt): cudaError_t;stdcall;
+cudaMemset2D: function(var devPtr; pitch: NativeUInt; value: Integer;
+  width: NativeUInt; height: NativeUInt): cudaError_t;stdcall;
+(*****************************************************************************
+ *                                                                           *
+ *                                                                           *
+ *                                                                           *
+ *****************************************************************************)
+
+cudaGetSymbolAddress: function(var devPtr: Pointer; const symbol: PAnsiChar): cudaError_t;stdcall;
+cudaGetSymbolSize: function(var size: NativeUInt; const symbol: PAnsiChar): cudaError_t;stdcall;
+
+{ +//*************************************************************************** }
+{ -** }
+{ -** }
+{ -** }
+{ =***************************************************************************** }
+
+cudaGetDeviceCount: function(var count: Integer): cudaError_t;stdcall;
+cudaGetDeviceProperties: function(var prop: TCudaDeviceProp; device: Integer): cudaError_t;stdcall;
+cudaChooseDevice: function(var device: Integer; const prop: PCudaDeviceProp): cudaError_t;stdcall;
+cudaSetDevice: function(device: Integer): cudaError_t;stdcall;
+cudaGetDevice: function(var device: Integer): cudaError_t;stdcall;
+cudaSetDeviceFlags: function(flags: Integer): cudaError_t;stdcall;
+cudaSetValidDevices: function(device_arr: PInteger; len: Integer): cudaError_t;stdcall;
+(******************************************************************************
+ *
+ *
+ *
+ *****************************************************************************)
+
+cudaConfigureCall: function(gridDim, blockDim: TDim3; sharedMem: NativeUInt; stream: cudaStream_t): cudaError_t;stdcall;
+cudaSetupArgument: function(const arg: Pointer; size: NativeUInt; offset: NativeUInt): cudaError_t;stdcall;
+cudaFuncSetCacheConfig: function(const func: PAnsiChar; cacheConfig: TcudaFuncCache): cudaError_t;stdcall;
+cudaLaunch: function(const entry: PAnsiChar): cudaError_t;stdcall;
+cudaFuncGetAttributes: function(var attr: TcudaFuncAttributes; const func: PAnsiChar): cudaError_t;stdcall;
+  { +//****************************************************************************** }
+  { -** }
+  { -** }
+  { -** }
+  { =*******************************************************************************/ }
+
+cudaGetLastError: function: cudaError_t;stdcall;
+  { +//****************************************************************************** }
+  { -** }
+  { -** }
+  { -** }
+  { =*******************************************************************************/ }
+cudaGLSetGLDevice: function(device: Integer): cudaError_t;stdcall;
+cudaGLRegisterBufferObject: function(bufObj: Cardinal): cudaError_t;stdcall;
+cudaGraphicsGLRegisterImage: function(const resource: PCUgraphicsResource; image: Cardinal; target: Cardinal; flags: Cardinal): cudaError_t;stdcall;
+cudaGraphicsGLRegisterBuffer: function(const resource: PCUgraphicsResource; buffer: Cardinal; flags: Cardinal): cudaError_t;stdcall;
+cudaGLMapBufferObject: function(devPtr: Pointer; bufObj: Cardinal): cudaError_t;stdcall;
+cudaGLUnmapBufferObject: function(bufObj: Cardinal): cudaError_t;stdcall;
+cudaGLUnregisterBufferObject: function(bufObj: Cardinal): cudaError_t;stdcall;
+cudaGLSetBufferObjectMapFlags: function(bufObj: Cardinal; flags: TCudaGLMapFlags): cudaError_t;stdcall;
+cudaGLMapBufferObjectAsync: function(var devPtr: Pointer; bufObj: Cardinal; stream: cudaStream_t): cudaError_t;stdcall;
+cudaGLUnmapBufferObjectAsync: function(bufObj: Cardinal; stream: cudaStream_t): cudaError_t;stdcall;
+cudaGraphicsUnregisterResource: function(resource: PCUgraphicsResource): cudaError_t;stdcall;
+cudaGraphicsResourceSetMapFlags: function(resource: PCUgraphicsResource; flags: Cardinal): cudaError_t;stdcall;
+cudaGraphicsMapResources: function(count: Integer; const resources: PCUgraphicsResource; stream: cudaStream_t): cudaError_t;stdcall;
+cudaGraphicsUnmapResources: function(count: Integer; const resources: PCUgraphicsResource; stream: cudaStream_t): cudaError_t;stdcall;
+cudaGraphicsResourceGetMappedPointer: function(var pDevPtr: TCUdeviceptr;
+  var pSize: Cardinal; resource: PCUgraphicsResource): cudaError_t;stdcall;
+cudaGraphicsSubResourceGetMappedArray: function(var pArray: PCUarray;
+  resource: PCUgraphicsResource; arrayIndex: Cardinal; mipLevel: Cardinal): cudaError_t;stdcall;
+cudaGetErrorString: function(error: cudaError_t): PAnsiChar;stdcall;
+cudaDriverGetVersion: function(out driverVersion: Integer): cudaError_t;stdcall;
+cudaRuntimeGetVersion: function(out runtimeVersion: Integer): cudaError_t;stdcall;
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaSetDoubleForDevice: function(var d: Double): cudaError_t;stdcall;
+cudaSetDoubleForHost: function(var d: Double): cudaError_t;stdcall;
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaStreamCreate: function(var pStream: cudaStream_t): cudaError_t;stdcall;
+cudaStreamDestroy: function(stream: cudaStream_t): cudaError_t;stdcall;
+cudaStreamSynchronize: function(stream: cudaStream_t): cudaError_t;stdcall;
+cudaStreamQuery: function(stream: cudaStream_t): cudaError_t;stdcall;
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaEventCreate: function(var event: cudaEvent_t): cudaError_t;stdcall;
+cudaEventCreateWithFlags: function(var event: cudaEvent_t; flags: Integer): cudaError_t;stdcall;
+cudaEventRecord: function(event: cudaEvent_t; stream: cudaStream_t): cudaError_t;stdcall;
+cudaEventQuery: function(event: cudaEvent_t): cudaError_t;stdcall;
+cudaEventSynchronize: function(event: cudaEvent_t): cudaError_t;stdcall;
+cudaEventDestroy: function(event: cudaEvent_t): cudaError_t;stdcall;
+cudaEventElapsedTime: function(var ms: Single; start: cudaEvent_t; ending: cudaEvent_t): cudaError_t;stdcall;
+cudaWGLGetDevice: function(var device: Integer; hGpu: HGPUNV): cudaError_t;stdcall;
+(* ******************************************************************************
+ *                                                                              *
+ *                                                                              *
+ *                                                                              *
+ ****************************************************************************** *)
+
+cudaThreadExit: function(): cudaError_t;stdcall;
+cudaThreadSynchronize: function(): cudaError_t;stdcall;
+cudaThreadSetLimit: function(limit: TcudaLimit; value: NativeUInt): cudaError_t;stdcall;
+cudaThreadGetLimit: function(var value: NativeUInt; limit: TcudaLimit): cudaError_t;stdcall;
+function cudaGetLastErrorString: string;
+function InitCUDART: Boolean;
+procedure CloseCUDART;
+function InitCUDARTFromLibrary(const LibName: WideString): Boolean;
+function IsCUDARTInitialized: Boolean;
+
+//--------------------------------------------
+implementation
+//--------------------------------------------
+
+function cudaGetLastErrorString: string;
+begin
+  Result := string(cudaGetErrorString(cudaGetLastError));
+end;
+
+const
+  INVALID_MODULEHANDLE = 0;
+
+var
+{$IFDEF MSWINDOWS}
+  CUDARTHandle: HINST = INVALID_MODULEHANDLE;
+{$ELSE}
+  CUDARTHandle: TLibHandle = INVALID_MODULEHANDLE;
+{$ENDIF}
+
+function CUDARTGetProcAddress(ProcName: PAnsiChar): Pointer;
+begin
+  Result := GetProcAddress(CUDARTHandle, ProcName);
+end;
+
+function InitCUDART: Boolean;
+var
+  I: Integer;
+begin
+  Result := True;
+  if CUDARTHandle = INVALID_MODULEHANDLE then
+  begin
+    for I := 0 to High(CUDARTDLLNAMES) do
+    begin
+      if InitCUDARTFromLibrary(CUDARTDLLNAMES[I] + '.dll') then
+        Exit;
+    end;
+  end;
+  Result := False;
+end;
+
+procedure CloseCUDART;
+begin
+  if CUDARTHandle <> INVALID_MODULEHANDLE then
+  begin
+    FreeLibrary(CUDARTHandle);
+    CUDARTHandle := INVALID_MODULEHANDLE;
+  end;
+end;
+
+function InitCUDARTFromLibrary(const LibName: WideString): Boolean;
+var
+  V: Integer;
+begin
+  CloseCUDART;
+  CUDARTHandle := GetModuleHandleW(PWideChar(LibName));
+  if CUDARTHandle = INVALID_MODULEHANDLE then
+    CUDARTHandle := LoadLibraryW(PWideChar(LibName));
+
+  if CUDARTHandle = INVALID_MODULEHANDLE then
+    Exit(False);
+
+  cudaFreeHost := CUDARTGetProcAddress('cudaFreeHost');
+  cudaFuncGetAttributes := CUDARTGetProcAddress('cudaFuncGetAttributes');
+  cudaGetChannelDesc := CUDARTGetProcAddress('cudaGetChannelDesc');
+  cudaGetDevice := CUDARTGetProcAddress('cudaGetDevice');
+  cudaGetDeviceCount := CUDARTGetProcAddress('cudaGetDeviceCount');
+  cudaGetDeviceProperties := CUDARTGetProcAddress('cudaGetDeviceProperties');
+  cudaGetErrorString := CUDARTGetProcAddress('cudaGetErrorString');
+  cudaGetLastError := CUDARTGetProcAddress('cudaGetLastError');
+  cudaGetSymbolAddress := CUDARTGetProcAddress('cudaGetSymbolAddress');
+  cudaGetSymbolSize := CUDARTGetProcAddress('cudaGetSymbolSize');
+  cudaGetTextureAlignmentOffset := CUDARTGetProcAddress('cudaGetTextureAlignmentOffset');
+  cudaGetTextureReference := CUDARTGetProcAddress('cudaGetTextureReference');
+  cudaGLMapBufferObject := CUDARTGetProcAddress('cudaGLMapBufferObject');
+  cudaGLMapBufferObjectAsync := CUDARTGetProcAddress('cudaGLMapBufferObjectAsync');
+  cudaGLRegisterBufferObject := CUDARTGetProcAddress('cudaGLRegisterBufferObject');
+  cudaGLSetBufferObjectMapFlags := CUDARTGetProcAddress('cudaGLSetBufferObjectMapFlags');
+  cudaGLSetGLDevice := CUDARTGetProcAddress('cudaGLSetGLDevice');
+  cudaGLUnmapBufferObject := CUDARTGetProcAddress('cudaGLUnmapBufferObject');
+  cudaGLUnmapBufferObjectAsync := CUDARTGetProcAddress('cudaGLUnmapBufferObjectAsync');
+  cudaGLUnregisterBufferObject := CUDARTGetProcAddress('cudaGLUnregisterBufferObject');
+  cudaGraphicsGLRegisterBuffer := CUDARTGetProcAddress('cudaGraphicsGLRegisterBuffer');
+  cudaGraphicsGLRegisterImage := CUDARTGetProcAddress('cudaGraphicsGLRegisterImage');
+  cudaGraphicsMapResources := CUDARTGetProcAddress('cudaGraphicsMapResources');
+  cudaGraphicsResourceGetMappedPointer := CUDARTGetProcAddress('cudaGraphicsResourceGetMappedPointer');
+  cudaGraphicsResourceSetMapFlags := CUDARTGetProcAddress('cudaGraphicsResourceSetMapFlags');
+  cudaGraphicsSubResourceGetMappedArray := CUDARTGetProcAddress('cudaGraphicsSubResourceGetMappedArray');
+  cudaGraphicsUnmapResources := CUDARTGetProcAddress('cudaGraphicsUnmapResources');
+  cudaGraphicsUnregisterResource := CUDARTGetProcAddress('cudaGraphicsUnregisterResource');
+  cudaHostAlloc := CUDARTGetProcAddress('cudaHostAlloc');
+  cudaHostGetDevicePointer := CUDARTGetProcAddress('cudaHostGetDevicePointer');
+  cudaHostGetFlags := CUDARTGetProcAddress('cudaHostGetFlags');
+  cudaLaunch := CUDARTGetProcAddress('cudaLaunch');
+  cudaMalloc := CUDARTGetProcAddress('cudaMalloc');
+  cudaMalloc3D := CUDARTGetProcAddress('cudaMalloc3D');
+  cudaMalloc3DArray := CUDARTGetProcAddress('cudaMalloc3DArray');
+  cudaMallocArray := CUDARTGetProcAddress('cudaMallocArray');
+  cudaMallocHost := CUDARTGetProcAddress('cudaMallocHost');
+  cudaMallocPitch := CUDARTGetProcAddress('cudaMallocPitch');
+  cudaMemcpy := CUDARTGetProcAddress('cudaMemcpy');
+  cudaMemcpy2D := CUDARTGetProcAddress('cudaMemcpy2D');
+  cudaMemcpy2DArrayToArray := CUDARTGetProcAddress('cudaMemcpy2DArrayToArray');
+  cudaMemcpy2DAsync := CUDARTGetProcAddress('cudaMemcpy2DAsync');
+  cudaMemcpy2DFromArray := CUDARTGetProcAddress('cudaMemcpy2DFromArray');
+  cudaMemcpy2DFromArrayAsync := CUDARTGetProcAddress('cudaMemcpy2DFromArrayAsync');
+  cudaMemcpy2DToArray := CUDARTGetProcAddress('cudaMemcpy2DToArray');
+  cudaMemcpy2DToArrayAsync := CUDARTGetProcAddress('cudaMemcpy2DToArrayAsync');
+  cudaMemcpy3D := CUDARTGetProcAddress('cudaMemcpy3D');
+  cudaMemcpy3DAsync := CUDARTGetProcAddress('cudaMemcpy3DAsync');
+  cudaMemcpyArrayToArray := CUDARTGetProcAddress('cudaMemcpyArrayToArray');
+  cudaMemcpyAsync := CUDARTGetProcAddress('cudaMemcpyAsync');
+  cudaMemcpyFromArray := CUDARTGetProcAddress('cudaMemcpyFromArray');
+  cudaMemcpyFromArrayAsync := CUDARTGetProcAddress('cudaMemcpyFromArrayAsync');
+  cudaMemcpyFromSymbol := CUDARTGetProcAddress('cudaMemcpyFromSymbol');
+  cudaMemcpyFromSymbolAsync := CUDARTGetProcAddress('cudaMemcpyFromSymbolAsync');
+  cudaMemcpyToArray := CUDARTGetProcAddress('cudaMemcpyToArray');
+  cudaMemcpyToArrayAsync := CUDARTGetProcAddress('cudaMemcpyToArrayAsync');
+  cudaMemcpyToSymbol := CUDARTGetProcAddress('cudaMemcpyToSymbol');
+  cudaMemcpyToSymbolAsync := CUDARTGetProcAddress('cudaMemcpyToSymbolAsync');
+  cudaMemGetInfo := CUDARTGetProcAddress('cudaMemGetInfo');
+  cudaMemset := CUDARTGetProcAddress('cudaMemset');
+  cudaMemset2D := CUDARTGetProcAddress('cudaMemset2D');
+  cudaMemset3D := CUDARTGetProcAddress('cudaMemset3D');
+  cudaRuntimeGetVersion := CUDARTGetProcAddress('cudaRuntimeGetVersion');
+  cudaSetDevice := CUDARTGetProcAddress('cudaSetDevice');
+  cudaSetDeviceFlags := CUDARTGetProcAddress('cudaSetDeviceFlags');
+  cudaSetDoubleForDevice := CUDARTGetProcAddress('cudaSetDoubleForDevice');
+  cudaSetDoubleForHost := CUDARTGetProcAddress('cudaSetDoubleForHost');
+  cudaSetupArgument := CUDARTGetProcAddress('cudaSetupArgument');
+  cudaSetValidDevices := CUDARTGetProcAddress('cudaSetValidDevices');
+  cudaStreamCreate := CUDARTGetProcAddress('cudaStreamCreate');
+  cudaStreamDestroy := CUDARTGetProcAddress('cudaStreamDestroy');
+  cudaStreamQuery := CUDARTGetProcAddress('cudaStreamQuery');
+  cudaStreamSynchronize := CUDARTGetProcAddress('cudaStreamSynchronize');
+  cudaThreadExit := CUDARTGetProcAddress('cudaThreadExit');
+  cudaThreadSynchronize := CUDARTGetProcAddress('cudaThreadSynchronize');
+  cudaThreadSetLimit := CUDARTGetProcAddress('cudaThreadSetLimit');
+  cudaThreadGetLimit := CUDARTGetProcAddress('cudaThreadGetLimit');
+  cudaUnbindTexture := CUDARTGetProcAddress('cudaUnbindTexture');
+  cudaWGLGetDevice := CUDARTGetProcAddress('cudaWGLGetDevice');
+
+  cudaRuntimeGetVersion(V);
+  Result := True;
+end;
+
+function IsCUDARTInitialized: Boolean;
+begin
+  Result := (CUDARTHandle <> INVALID_MODULEHANDLE);
+end;
+
+//-----------------------------------------------
+initialization
+//-----------------------------------------------
+
+//-----------------------------------------------
+finalization
+//-----------------------------------------------
+
+CloseCUDART;
+
+end.

+ 154 - 0
Source/Import.CUDAUtility.pas

@@ -0,0 +1,154 @@
+//
+// This unit is part of the GLScene Engine, http://glscene.org
+//
+
+unit Import.CUDAUtility;
+
+(* CUDA Utility Wraper of cutil *)
+
+interface
+
+uses
+  Winapi.Windows;
+
+
+const
+{$IFDEF WIN64}
+  CUTILDLL = 'cutil64.dll';
+{$ELSE}
+  CUTILDLL = 'cutil32.dll';
+{$ENDIF}
+
+var
+  cutFindFilePath: function(const filename: PAnsiChar; const executablePath: PAnsiChar): PAnsiChar;stdcall;
+  cutLoadPGMf: function(const filename: PAnsiChar; var data: System.PSingle; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutSavePGMf: function(const filename: PAnsiChar; data: System.PSingle; w: Integer; h: Integer): Boolean;stdcall;
+  cutLoadPGMub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPPMub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPPM4ub: function(const filename: PAnsiChar; var data: PByte; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPGMi: function(const filename: PAnsiChar; var data: PInteger; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutLoadPGMs: function(const filename: PAnsiChar; var data: PWord; var w: Integer; var h: Integer): Boolean;stdcall;
+  cutSavePGMub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePPMub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePPM4ub: function(const filename: PAnsiChar; data: PByte; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePGMi: function(const filename: PAnsiChar; data: PInteger; w: Integer; h: Integer): Boolean;stdcall;
+  cutSavePGMs: function(const filename: PAnsiChar; data: PWord; w: Integer; h: Integer): Boolean;stdcall;
+  cutComparef: function(const reference: PSingle; const data: PSingle; const len: Cardinal): Boolean;stdcall;
+  cutComparei: function(const reference: PInteger; const data: PInteger; const len: Cardinal): Boolean;stdcall;
+  cutCompareuit: function(const reference: PInteger; const data: PInteger; const len: Cardinal; const epsilon: Single;
+    const threshold: Single): Boolean;stdcall;
+  cutCompareub: function(const reference: PByte; const data: PByte; const len: Cardinal): Boolean;stdcall;
+  cutCompareubt: function(const reference: PByte; const data: PByte; const len: Cardinal; const epsilon: Single;
+    const threshold: Single): Boolean;stdcall;
+  cutCompareube: function(const reference: PByte; const data: PByte; const len: Cardinal; const epsilon: Single): Boolean;stdcall;
+  cutComparefe: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single): Boolean;stdcall;
+  cutComparefet: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single;
+    const threshold: Single): Boolean;stdcall;
+  cutCompareL2fe: function(const reference: PSingle; const data: PSingle; const len: Cardinal; const epsilon: Single): Boolean;stdcall;
+  cutCreateTimer: function(var name: Cardinal): Boolean;stdcall;
+  cutStartTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutStopTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutResetTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutDeleteTimer: function(const name: Cardinal): Boolean;stdcall;
+  cutGetTimerValue: function(const name: Cardinal): Single;stdcall;
+  cutGetAverageTimerValue: function(const name: Cardinal): Single;stdcall;
+  cutFree: procedure(ptr: Pointer);stdcall;
+
+function InitCUTIL: Boolean;
+procedure CloseCUTIL;
+function InitCUTILFromLibrary(const LibName: WideString): Boolean;
+function IsCUTILInitialized: Boolean;
+
+// ------------------------------------------------------
+implementation
+// ------------------------------------------------------
+
+const
+  INVALID_MODULEHANDLE = 0;
+
+{$IFDEF MSWINDOWS}
+// ************** Windows specific ********************
+var
+  CUTILHandle: HINST = INVALID_MODULEHANDLE;
+{$ELSE}
+// ************** UNIX specific ********************
+var
+  CUTILHandle: TLibHandle = INVALID_MODULEHANDLE;
+{$ENDIF}
+
+function CUTILGetProcAddress(ProcName: PAnsiChar): Pointer;
+begin
+  result := GetProcAddress(Cardinal(CUTILHandle), ProcName);
+end;
+
+function InitCUTIL: Boolean;
+begin
+  if CUTILHandle = INVALID_MODULEHANDLE then
+    result := InitCUTILFromLibrary(CUTILDLL)
+  else
+    result := True;
+end;
+
+procedure CloseCUTIL;
+begin
+  if CUTILHandle <> INVALID_MODULEHANDLE then
+  begin
+    FreeLibrary(Cardinal(CUTILHandle));
+    CUTILHandle := INVALID_MODULEHANDLE;
+  end;
+end;
+
+function InitCUTILFromLibrary(const LibName: WideString): Boolean;
+begin
+  result := False;
+  CloseCUTIL;
+  CUTILHandle := LoadLibraryW(PWideChar(LibName));
+  if CUTILHandle = INVALID_MODULEHANDLE then
+    Exit;
+  cutFindFilePath := CUTILGetProcAddress('cutFindFilePath');
+  cutLoadPGMf := CUTILGetProcAddress('cutLoadPGMf');
+  cutSavePGMf := CUTILGetProcAddress('cutSavePGMf');
+  cutLoadPGMub := CUTILGetProcAddress('cutLoadPGMub');
+  cutLoadPPMub := CUTILGetProcAddress('cutLoadPPMub');
+  cutLoadPPM4ub := CUTILGetProcAddress('cutLoadPPM4ub');
+  cutLoadPGMi := CUTILGetProcAddress('cutLoadPGMi');
+  cutLoadPGMs := CUTILGetProcAddress('cutLoadPGMs');
+  cutSavePGMub := CUTILGetProcAddress('cutSavePGMub');
+  cutSavePPMub := CUTILGetProcAddress('cutSavePPMub');
+  cutSavePPM4ub := CUTILGetProcAddress('cutSavePPM4ub');
+  cutSavePGMi := CUTILGetProcAddress('cutSavePGMi');
+  cutSavePGMs := CUTILGetProcAddress('cutSavePGMs');
+  cutComparef := CUTILGetProcAddress('cutComparef');
+  cutComparei := CUTILGetProcAddress('cutComparei');
+  cutCompareuit := CUTILGetProcAddress('cutCompareuit');
+  cutCompareub := CUTILGetProcAddress('cutCompareub');
+  cutCompareubt := CUTILGetProcAddress('cutCompareubt');
+  cutCompareube := CUTILGetProcAddress('cutCompareube');
+  cutComparefe := CUTILGetProcAddress('cutComparefe');
+  cutComparefet := CUTILGetProcAddress('cutComparefet');
+  cutCompareL2fe := CUTILGetProcAddress('cutCompareL2fe');
+  cutCreateTimer := CUTILGetProcAddress('cutCreateTimer');
+  cutStartTimer := CUTILGetProcAddress('cutStartTimer');
+  cutStopTimer := CUTILGetProcAddress('cutStopTimer');
+  cutResetTimer := CUTILGetProcAddress('cutResetTimer');
+  cutDeleteTimer := CUTILGetProcAddress('cutDeleteTimer');
+  cutGetTimerValue := CUTILGetProcAddress('cutGetTimerValue');
+  cutGetAverageTimerValue := CUTILGetProcAddress('cutGetAverageTimerValue');
+  cutFree := CUTILGetProcAddress('cutFree');
+  result := True;
+end;
+
+function IsCUTILInitialized: Boolean;
+begin
+  result := (CUTILHandle <> INVALID_MODULEHANDLE);
+end;
+
+//-----------------------------------------------
+initialization
+//-----------------------------------------------
+
+finalization
+
+  CloseCUTIL;
+
+end.

Vissa filer visades inte eftersom för många filer har ändrats