Переглянути джерело

Updated kernel.cu for scalarproduct demo license

Pavel Vassiliev 2 роки тому
батько
коміт
bba47f67ae

+ 0 - 1
Examples/Demos/bench/fBenchD.dfm

@@ -48,7 +48,6 @@ object frmBench: TfrmBench
     Top = 0
     Width = 761
     Height = 566
-    ActivePage = tsCanvas
     Align = alClient
     TabOrder = 1
     object tsCanvas: TTabSheet

+ 20 - 0
Examples/Demos/computing/CudaD.dpr

@@ -0,0 +1,20 @@
+program CudaD;
+
+uses
+  Vcl.Forms,
+  fCudaD in 'fCudaD.pas' {FormCudaD},
+  fFastFourierD in 'FastFourierTransformation\fFastFourierD.pas' {FormFFT},
+  fPostProcessingD in 'PostProcessing\fPostProcessingD.pas',
+  fScalarProductD in 'ScalarProduct\fScalarProductD.pas',
+  fSimpleTexD in 'SimpleCUDATexture\fSimpleTexD.pas',
+  fFluidsD in 'StableFluids\fFluidsD.pas',
+  fVertexGenD in 'VertexDataGeneration\fVertexGenD.pas';
+
+{$R *.res}
+
+begin
+  Application.Initialize;
+  Application.MainFormOnTaskbar := True;
+  Application.CreateForm(TFormCudaD, FormCudaD);
+  Application.Run;
+end.

+ 142 - 0
Examples/Demos/computing/CudaD.dproj

@@ -0,0 +1,142 @@
+<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+    <PropertyGroup>
+        <ProjectGuid>{F53BCDB0-AD6D-4859-915C-735290656A01}</ProjectGuid>
+        <MainSource>CudaD.dpr</MainSource>
+        <Base>True</Base>
+        <Config Condition="'$(Config)'==''">Debug</Config>
+        <TargetedPlatforms>129</TargetedPlatforms>
+        <AppType>Application</AppType>
+        <FrameworkType>VCL</FrameworkType>
+        <ProjectVersion>19.4</ProjectVersion>
+        <Platform Condition="'$(Platform)'==''">Win32</Platform>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Config)'=='Base' or '$(Base)'!=''">
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="('$(Platform)'=='Win32' and '$(Base)'=='true') or '$(Base_Win32)'!=''">
+        <Base_Win32>true</Base_Win32>
+        <CfgParent>Base</CfgParent>
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="('$(Platform)'=='Win64' and '$(Base)'=='true') or '$(Base_Win64)'!=''">
+        <Base_Win64>true</Base_Win64>
+        <CfgParent>Base</CfgParent>
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Config)'=='Release' or '$(Cfg_1)'!=''">
+        <Cfg_1>true</Cfg_1>
+        <CfgParent>Base</CfgParent>
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="('$(Platform)'=='Win32' and '$(Cfg_1)'=='true') or '$(Cfg_1_Win32)'!=''">
+        <Cfg_1_Win32>true</Cfg_1_Win32>
+        <CfgParent>Cfg_1</CfgParent>
+        <Cfg_1>true</Cfg_1>
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Config)'=='Debug' or '$(Cfg_2)'!=''">
+        <Cfg_2>true</Cfg_2>
+        <CfgParent>Base</CfgParent>
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="('$(Platform)'=='Win32' and '$(Cfg_2)'=='true') or '$(Cfg_2_Win32)'!=''">
+        <Cfg_2_Win32>true</Cfg_2_Win32>
+        <CfgParent>Cfg_2</CfgParent>
+        <Cfg_2>true</Cfg_2>
+        <Base>true</Base>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Base)'!=''">
+        <DCC_E>false</DCC_E>
+        <DCC_F>false</DCC_F>
+        <DCC_K>false</DCC_K>
+        <DCC_N>false</DCC_N>
+        <DCC_S>false</DCC_S>
+        <DCC_ImageBase>00400000</DCC_ImageBase>
+        <SanitizedProjectName>CudaD</SanitizedProjectName>
+        <DCC_Namespace>Vcl;Vcl.Imaging;Vcl.Touch;Vcl.Samples;Vcl.Shell;System;Xml;Data;Datasnap;Web;Soap;$(DCC_Namespace)</DCC_Namespace>
+        <VerInfo_Locale>1049</VerInfo_Locale>
+        <VerInfo_Keys>CompanyName=;FileDescription=;FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductName=;ProductVersion=1.0.0.0;Comments=;CFBundleName=</VerInfo_Keys>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Base_Win32)'!=''">
+        <DCC_Namespace>Winapi;System.Win;Data.Win;Datasnap.Win;Web.Win;Soap.Win;Xml.Win;Bde;$(DCC_Namespace)</DCC_Namespace>
+        <BT_BuildType>Debug</BT_BuildType>
+        <VerInfo_IncludeVerInfo>true</VerInfo_IncludeVerInfo>
+        <VerInfo_Keys>CompanyName=;FileDescription=$(MSBuildProjectName);FileVersion=1.0.0.0;InternalName=;LegalCopyright=;LegalTrademarks=;OriginalFilename=;ProductName=$(MSBuildProjectName);ProductVersion=1.0.0.0;Comments=;ProgramID=com.embarcadero.$(MSBuildProjectName)</VerInfo_Keys>
+        <VerInfo_Locale>1033</VerInfo_Locale>
+        <Manifest_File>$(BDS)\bin\default_app.manifest</Manifest_File>
+        <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
+        <UWP_DelphiLogo44>$(BDS)\bin\Artwork\Windows\UWP\delphi_UwpDefault_44.png</UWP_DelphiLogo44>
+        <UWP_DelphiLogo150>$(BDS)\bin\Artwork\Windows\UWP\delphi_UwpDefault_150.png</UWP_DelphiLogo150>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Base_Win64)'!=''">
+        <UWP_DelphiLogo44>$(BDS)\bin\Artwork\Windows\UWP\delphi_UwpDefault_44.png</UWP_DelphiLogo44>
+        <UWP_DelphiLogo150>$(BDS)\bin\Artwork\Windows\UWP\delphi_UwpDefault_150.png</UWP_DelphiLogo150>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Cfg_1)'!=''">
+        <DCC_Define>RELEASE;$(DCC_Define)</DCC_Define>
+        <DCC_DebugInformation>0</DCC_DebugInformation>
+        <DCC_LocalDebugSymbols>false</DCC_LocalDebugSymbols>
+        <DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Cfg_1_Win32)'!=''">
+        <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
+        <AppDPIAwarenessMode>PerMonitorV2</AppDPIAwarenessMode>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Cfg_2)'!=''">
+        <DCC_Define>DEBUG;$(DCC_Define)</DCC_Define>
+        <DCC_Optimize>false</DCC_Optimize>
+        <DCC_GenerateStackFrames>true</DCC_GenerateStackFrames>
+        <DCC_RangeChecking>true</DCC_RangeChecking>
+        <DCC_IntegerOverflowCheck>true</DCC_IntegerOverflowCheck>
+    </PropertyGroup>
+    <PropertyGroup Condition="'$(Cfg_2_Win32)'!=''">
+        <AppEnableRuntimeThemes>true</AppEnableRuntimeThemes>
+        <AppDPIAwarenessMode>PerMonitorV2</AppDPIAwarenessMode>
+    </PropertyGroup>
+    <ItemGroup>
+        <DelphiCompile Include="$(MainSource)">
+            <MainSource>MainSource</MainSource>
+        </DelphiCompile>
+        <DCCReference Include="fCudaD.pas">
+            <Form>FormCudaD</Form>
+        </DCCReference>
+        <DCCReference Include="FastFourierTransformation\fFastFourierD.pas">
+            <Form>FormFFT</Form>
+        </DCCReference>
+        <DCCReference Include="PostProcessing\fPostProcessingD.pas"/>
+        <DCCReference Include="ScalarProduct\fScalarProductD.pas"/>
+        <DCCReference Include="SimpleCUDATexture\fSimpleTexD.pas"/>
+        <DCCReference Include="StableFluids\fFluidsD.pas"/>
+        <DCCReference Include="VertexDataGeneration\fVertexGenD.pas"/>
+        <BuildConfiguration Include="Base">
+            <Key>Base</Key>
+        </BuildConfiguration>
+        <BuildConfiguration Include="Release">
+            <Key>Cfg_1</Key>
+            <CfgParent>Base</CfgParent>
+        </BuildConfiguration>
+        <BuildConfiguration Include="Debug">
+            <Key>Cfg_2</Key>
+            <CfgParent>Base</CfgParent>
+        </BuildConfiguration>
+    </ItemGroup>
+    <ProjectExtensions>
+        <Borland.Personality>Delphi.Personality.12</Borland.Personality>
+        <Borland.ProjectType/>
+        <BorlandProject>
+            <Delphi.Personality>
+                <Source>
+                    <Source Name="MainSource">CudaD.dpr</Source>
+                </Source>
+            </Delphi.Personality>
+            <Platforms>
+                <Platform value="Linux64">True</Platform>
+                <Platform value="Win32">True</Platform>
+                <Platform value="Win64">False</Platform>
+            </Platforms>
+        </BorlandProject>
+        <ProjectFileVersion>12</ProjectFileVersion>
+    </ProjectExtensions>
+    <Import Project="$(BDS)\Bin\CodeGear.Delphi.Targets" Condition="Exists('$(BDS)\Bin\CodeGear.Delphi.Targets')"/>
+    <Import Project="$(APPDATA)\Embarcadero\$(BDSAPPDATABASEDIR)\$(PRODUCTVERSION)\UserTools.proj" Condition="Exists('$(APPDATA)\Embarcadero\$(BDSAPPDATABASEDIR)\$(PRODUCTVERSION)\UserTools.proj')"/>
+</Project>

+ 2 - 1
Examples/Demos/computing/FastFourierTransformation/FastFourierD.dpr

@@ -3,12 +3,13 @@ program FastFourierD;
 uses
   Forms,
   uCPUFFT in 'uCPUFFT.pas',
-  fFourier_D in 'fFourier_D.pas';
+  fFastFourierD in 'fFastFourierD.pas' {Form1};
 
 {$R *.res}
 
 begin
   ReportMemoryLeaksOnShutdown := True;
   Application.Initialize;
+  Application.CreateForm(TForm1, Form1);
   Application.Run;
 end.

+ 4 - 1
Examples/Demos/computing/FastFourierTransformation/FastFourierD.dproj

@@ -128,7 +128,10 @@
             <MainSource>MainSource</MainSource>
         </DelphiCompile>
         <DCCReference Include="uCPUFFT.pas"/>
-        <DCCReference Include="fFourier_D.pas"/>
+        <DCCReference Include="fFastFourierD.pas">
+            <Form>Form1</Form>
+            <FormType>dfm</FormType>
+        </DCCReference>
         <BuildConfiguration Include="Base">
             <Key>Base</Key>
         </BuildConfiguration>

+ 4 - 25
Examples/Demos/computing/FastFourierTransformation/fFourierD.dfm

@@ -13,37 +13,16 @@ object Form4: TForm4
   Position = poScreenCenter
   TextHeight = 15
   object Panel1: TPanel
-    Left = 668
-    Top = 56
+    Left = 44
+    Top = 35
     Width = 221
     Height = 409
     Caption = 'Panel1'
     TabOrder = 0
-    object Label4: TLabel
-      Left = 64
-      Top = 64
-      Width = 34
-      Height = 15
-      Caption = 'Label4'
-    end
-    object Label3: TLabel
-      Left = 56
-      Top = 32
-      Width = 34
-      Height = 15
-      Caption = 'Label3'
-    end
-    object Label5: TLabel
-      Left = 72
-      Top = 88
-      Width = 34
-      Height = 15
-      Caption = 'Label5'
-    end
   end
   object Panel2: TPanel
-    Left = 32
-    Top = 65
+    Left = 640
+    Top = 53
     Width = 209
     Height = 391
     Caption = 'Panel2'

+ 34 - 14
Examples/Demos/computing/ScalarProduct/ScalarProduct_kernel.cu

@@ -1,17 +1,37 @@
-/*
- * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
- *
- * NVIDIA Corporation and its licensors retain all intellectual property and 
- * proprietary rights in and to this software and related documentation. 
- * Any use, reproduction, disclosure, or distribution of this software 
- * and related documentation without an express license agreement from
- * NVIDIA Corporation is strictly prohibited.
- *
- * Please refer to the applicable NVIDIA end user license agreement (EULA) 
- * associated with this source code for terms and conditions that govern 
- * your use of this NVIDIA software.
- * 
- */
+/// *
+// * Copyright 1993-2009 NVIDIA Corporation.  All rights reserved.
+// *
+// * NOTICE TO USER:
+// *
+// * This source code is subject to NVIDIA ownership rights under U.S. and
+// * international Copyright laws.  Users and possessors of this source code
+// * are hereby granted a nonexclusive, royalty-free license to use this code
+// * in individual and commercial software.
+// *
+// * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+// * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+// * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
+// * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+// * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+// * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+// * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+// * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+// * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
+// * OR PERFORMANCE OF THIS SOURCE CODE.
+// *
+// * U.S. Government End Users.   This source code is a "commercial item" as
+// * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
+// * "commercial computer  software"  and "commercial computer software
+// * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
+// * and is provided to the U.S. Government only as a commercial end item.
+// * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+// * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+// * source code with only those rights set forth herein.
+// *
+// * Any use of this source code in individual and commercial software must
+// * include, in the user documentation and internal comments to the code,
+// * the above Disclaimer and U.S. Government End Users Notice.
+// */
 
 ///////////////////////////////////////////////////////////////////////////////
 // On G80-class hardware 24-bit multiplication takes 4 clocks per warp

+ 4 - 262
Examples/Demos/computing/ScalarProduct/fScalarProductD.dfm

@@ -48,273 +48,15 @@ object FormSP: TFormSP
       Code.Strings = (
         #9'.version 1.4'
         #9'.target sm_13'
-        
+		
           #9'// compiled with C:\Program Files\NVIDIA GPU Computing Toolkit\' +
-          'CUDA\v3.2\\bin/../open64/lib//be.exe'
+            'CUDA\v3.2\\bin/../open64/lib//be.exe'
         #9'// nvopencc 3.2 built on 2010-11-06'
         ''
         #9'//-----------------------------------------------------------'
-        
-          #9'// Compiling C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_000009' +
-          '88_00000000-11_temp.cpp3.i (C:/Users/YARUNA~1/AppData/Local/Temp' +
-          '/ccBI#.a01408)'
+          #9'// Compiling C:/Users/VPV~1/AppData/Local/Temp/...)'
         #9'//-----------------------------------------------------------'
         ''
-        #9'//-----------------------------------------------------------'
-        #9'// Options:'
-        #9'//-----------------------------------------------------------'
-        #9'//  Target:ptx, ISA:sm_13, Endian:little, Pointer Size:32'
-        #9'//  -O3'#9'(Optimization level)'
-        #9'//  -g0'#9'(Debug level)'
-        #9'//  -m2'#9'(Report advisories)'
-        #9'//-----------------------------------------------------------'
-        ''
-        
-          #9'.file'#9'1'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_00000988_0' +
-          '0000000-10_temp.cudafe2.gpu"'
-        
-          #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
-          'E\crtdefs.h"'
-        
-          #9'.file'#9'3'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
-          '2\include\crt/device_runtime.h"'
-        
-          #9'.file'#9'4'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
-          '2\include\host_defines.h"'
-        
-          #9'.file'#9'5'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
-          '2\include\builtin_types.h"'
-        
-          #9'.file'#9'6'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\device_types.h"'
-        
-          #9'.file'#9'7'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\driver_types.h"'
-        
-          #9'.file'#9'8'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\surface_types.h"'
-        
-          #9'.file'#9'9'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\texture_types.h"'
-        
-          #9'.file'#9'10'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\vector_types.h"'
-        
-          #9'.file'#9'11'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\builtin_types.h"'
-        
-          #9'.file'#9'12'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\host_defines.h"'
-        
-          #9'.file'#9'13'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3' +
-          '.2\include\device_launch_parameters.h"'
-        
-          #9'.file'#9'14'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\crt\storage_class.h"'
-        
-          #9'.file'#9'15'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' +
-          'DE\time.h"'
-        
-          #9'.file'#9'16'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\texture_fetch_functions.h"'
-        
-          #9'.file'#9'17'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3' +
-          '.2\include\common_functions.h"'
-        
-          #9'.file'#9'18'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\math_functions.h"'
-        
-          #9'.file'#9'19'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\math_constants.h"'
-        
-          #9'.file'#9'20'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\device_functions.h"'
-        
-          #9'.file'#9'21'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_11_atomic_functions.h"'
-        
-          #9'.file'#9'22'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_12_atomic_functions.h"'
-        
-          #9'.file'#9'23'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_13_double_functions.h"'
-        
-          #9'.file'#9'24'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_20_atomic_functions.h"'
-        
-          #9'.file'#9'25'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_20_intrinsics.h"'
-        
-          #9'.file'#9'26'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\surface_functions.h"'
-        
-          #9'.file'#9'27'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\math_functions_dbl_ptx3.h"'
-        #9'.file'#9'28'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/temp.cu"'
-        ''
-        ''
-        #9'.entry _Z13scalarProdGPUPfS_S_ii ('
-        #9#9'.param .u32 __cudaparm__Z13scalarProdGPUPfS_S_ii_d_C,'
-        #9#9'.param .u32 __cudaparm__Z13scalarProdGPUPfS_S_ii_d_A,'
-        #9#9'.param .u32 __cudaparm__Z13scalarProdGPUPfS_S_ii_d_B,'
-        #9#9'.param .s32 __cudaparm__Z13scalarProdGPUPfS_S_ii_vectorN,'
-        #9#9'.param .s32 __cudaparm__Z13scalarProdGPUPfS_S_ii_elementN)'
-        #9'{'
-        #9'.reg .u16 %rh<3>;'
-        #9'.reg .u32 %r<46>;'
-        #9'.reg .f32 %f<9>;'
-        #9'.reg .pred %p<12>;'
-        
-          #9'.shared .align 4 .b8 __cuda___cuda_local_var_83325_34_non_const' +
-          '_accumResult20[4096];'
-        #9'.loc'#9'28'#9'41'#9'0'
-        '$LDWbegin__Z13scalarProdGPUPfS_S_ii:'
-        #9'.loc'#9'28'#9'51'#9'0'
-        #9'cvt.s32.u16 '#9'%r1, %ctaid.x;'
-        #9'mov.s32 '#9'%r2, %r1;'
-        
-          #9'ld.param.s32 '#9'%r3, [__cudaparm__Z13scalarProdGPUPfS_S_ii_vector' +
-          'N];'
-        #9'setp.le.s32 '#9'%p1, %r3, %r1;'
-        #9'@%p1 bra '#9'$Lt_0_6146;'
-        #9'cvt.s32.u16 '#9'%r4, %tid.x;'
-        #9'mov.s32 '#9'%r5, 1023;'
-        #9'setp.le.s32 '#9'%p2, %r4, %r5;'
-        #9'mov.u32 '#9'%r6, 0;'
-        #9'setp.eq.u32 '#9'%p3, %r4, %r6;'
-        #9'cvt.u32.u16 '#9'%r7, %nctaid.x;'
-        
-          #9'mov.u32 '#9'%r8, __cuda___cuda_local_var_83325_34_non_const_accumR' +
-          'esult20;'
-        '$Lt_0_6658:'
-        
-          ' //<loop> Loop body line 51, nesting depth: 1, estimated iterati' +
-          'ons: unknown'
-        #9'@!%p2 bra '#9'$Lt_0_6914;'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
-        
-          #9'ld.param.s32 '#9'%r9, [__cudaparm__Z13scalarProdGPUPfS_S_ii_elemen' +
-          'tN];'
-        #9'mul24.lo.s32 '#9'%r10, %r9, %r2;'
-        #9'add.s32 '#9'%r11, %r10, %r9;'
-        #9'mul24.lo.u32 '#9'%r12, %r4, 4;'
-        #9'cvt.u32.u16 '#9'%r13, %ntid.x;'
-        #9'mul24.lo.u32 '#9'%r14, %r13, 4;'
-        #9'add.s32 '#9'%r15, %r10, %r4;'
-        #9'add.u32 '#9'%r16, %r12, %r8;'
-        #9'add.u32 '#9'%r17, %r8, 4092;'
-        '$Lt_0_7426:'
-        
-          ' //<loop> Loop body line 51, nesting depth: 2, estimated iterati' +
-          'ons: unknown'
-        #9'.loc'#9'28'#9'64'#9'0'
-        #9'mov.s32 '#9'%r18, %r15;'
-        #9'setp.le.s32 '#9'%p4, %r11, %r18;'
-        #9'@%p4 bra '#9'$Lt_0_12290;'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
-        #9'sub.s32 '#9'%r19, %r11, %r18;'
-        #9'add.s32 '#9'%r20, %r19, 1023;'
-        #9'shr.s32 '#9'%r21, %r20, 31;'
-        #9'mov.s32 '#9'%r22, 1023;'
-        #9'and.b32 '#9'%r23, %r21, %r22;'
-        #9'add.s32 '#9'%r24, %r23, %r20;'
-        #9'shr.s32 '#9'%r25, %r24, 10;'
-        #9'mul.lo.u32 '#9'%r26, %r18, 4;'
-        #9'ld.param.u32 '#9'%r27, [__cudaparm__Z13scalarProdGPUPfS_S_ii_d_A];'
-        #9'add.u32 '#9'%r28, %r26, %r27;'
-        #9'ld.param.u32 '#9'%r29, [__cudaparm__Z13scalarProdGPUPfS_S_ii_d_B];'
-        #9'add.u32 '#9'%r30, %r29, %r26;'
-        #9'mul.lo.u32 '#9'%r31, %r11, 4;'
-        #9'add.u32 '#9'%r32, %r31, %r27;'
-        #9'mov.f32 '#9'%f1, 0f00000000;     '#9'// 0'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
-        #9'mov.s32 '#9'%r33, %r25;'
-        '$Lt_0_8194:'
-        
-          ' //<loop> Loop body line 64, nesting depth: 3, estimated iterati' +
-          'ons: unknown'
-        #9'.loc'#9'28'#9'65'#9'0'
-        #9'ld.global.f32 '#9'%f2, [%r28+0];'
-        #9'ld.global.f32 '#9'%f3, [%r30+0];'
-        #9'mad.f32 '#9'%f1, %f2, %f3, %f1;'
-        #9'add.u32 '#9'%r30, %r30, 4096;'
-        #9'add.u32 '#9'%r28, %r28, 4096;'
-        #9'setp.lt.u32 '#9'%p5, %r28, %r32;'
-        #9'@%p5 bra '#9'$Lt_0_8194;'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
-        #9'bra.uni '#9'$Lt_0_7682;'
-        '$Lt_0_12290:'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
-        #9'mov.f32 '#9'%f1, 0f00000000;     '#9'// 0'
-        '$Lt_0_7682:'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_7426'
-        #9'.loc'#9'28'#9'67'#9'0'
-        #9'st.shared.f32 '#9'[%r16+0], %f1;'
-        #9'add.s32 '#9'%r15, %r18, %r13;'
-        #9'add.u32 '#9'%r16, %r16, %r14;'
-        #9'setp.le.u32 '#9'%p6, %r16, %r17;'
-        #9'@%p6 bra '#9'$Lt_0_7426;'
-        '$Lt_0_6914:'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
-        #9'mov.s32 '#9'%r34, 512;'
-        '$Lt_0_9474:'
-        
-          ' //<loop> Loop body line 67, nesting depth: 2, estimated iterati' +
-          'ons: unknown'
-        #9'.loc'#9'28'#9'75'#9'0'
-        #9'bar.sync '#9'0;'
-        #9'setp.ge.s32 '#9'%p7, %r4, %r34;'
-        #9'@%p7 bra '#9'$Lt_0_9730;'
-        ' //<loop> Part of loop body line 67, head labeled $Lt_0_9474'
-        #9'mul24.lo.u32 '#9'%r35, %r4, 4;'
-        #9'mov.u16 '#9'%rh1, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r14, %rh1, 4;'
-        #9'mul.lo.u32 '#9'%r36, %r34, 4;'
-        #9'add.u32 '#9'%r37, %r35, %r8;'
-        #9'add.u32 '#9'%r38, %r36, %r8;'
-        #9'add.s32 '#9'%r39, %r36, %r35;'
-        #9'add.u32 '#9'%r40, %r39, %r8;'
-        '$Lt_0_10242:'
-        
-          ' //<loop> Loop body line 75, nesting depth: 2, estimated iterati' +
-          'ons: unknown'
-        #9'.loc'#9'28'#9'77'#9'0'
-        #9'ld.shared.f32 '#9'%f4, [%r37+0];'
-        #9'ld.shared.f32 '#9'%f5, [%r40+0];'
-        #9'add.f32 '#9'%f6, %f4, %f5;'
-        #9'st.shared.f32 '#9'[%r37+0], %f6;'
-        #9'add.u32 '#9'%r40, %r40, %r14;'
-        #9'add.u32 '#9'%r37, %r37, %r14;'
-        #9'setp.lt.u32 '#9'%p8, %r37, %r38;'
-        #9'@%p8 bra '#9'$Lt_0_10242;'
-        '$Lt_0_9730:'
-        ' //<loop> Part of loop body line 67, head labeled $Lt_0_9474'
-        #9'.loc'#9'28'#9'74'#9'0'
-        #9'shr.s32 '#9'%r34, %r34, 1;'
-        #9'mov.u32 '#9'%r41, 0;'
-        #9'setp.gt.s32 '#9'%p9, %r34, %r41;'
-        #9'@%p9 bra '#9'$Lt_0_9474;'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
-        #9'@!%p3 bra '#9'$Lt_0_11010;'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
-        #9'.loc'#9'28'#9'80'#9'0'
-        
-          #9'ld.shared.f32 '#9'%f7, [__cuda___cuda_local_var_83325_34_non_const' +
-          '_accumResult20+0];'
-        #9'ld.param.u32 '#9'%r42, [__cudaparm__Z13scalarProdGPUPfS_S_ii_d_C];'
-        #9'mul.lo.u32 '#9'%r43, %r2, 4;'
-        #9'add.u32 '#9'%r44, %r42, %r43;'
-        #9'st.global.f32 '#9'[%r44+0], %f7;'
-        '$Lt_0_11010:'
-        ' //<loop> Part of loop body line 51, head labeled $Lt_0_6658'
-        #9'add.u32 '#9'%r2, %r2, %r7;'
-        #9'setp.lt.s32 '#9'%p10, %r2, %r3;'
-        #9'@%p10 bra '#9'$Lt_0_6658;'
-        '$Lt_0_6146:'
-        #9'.loc'#9'28'#9'82'#9'0'
-        #9'exit;'
-        '$LDWend__Z13scalarProdGPUPfS_S_ii:'
-        #9'} // _Z13scalarProdGPUPfS_S_ii'
         '')
       Compiler = GLCUDACompiler1
       object scalarProdGPU: TCUDAFunction
@@ -389,4 +131,4 @@ object FormSP: TFormSP
     Left = 240
     Top = 128
   end
-end
+end

+ 4 - 0
Examples/Demos/computing/ScalarProduct/fScalarProductD.pas

@@ -12,6 +12,9 @@ uses
   Vcl.Dialogs,
   Vcl.StdCtrls,
 
+  GLS.Utils,
+  GLS.FilePGM,
+
   CUDA.Compiler,
   CUDA.Context,
   CUDA.APIComps,
@@ -50,6 +53,7 @@ type
 
 var
   FormSP: TFormSP;
+  pgm: TGLPGMImage;
 
 implementation
 

+ 2 - 1053
Examples/Demos/computing/SimpleCUDATexture/fSimpleTexD.dfm

@@ -1,4 +1,4 @@
-object Form1: TForm1
+object FormST: TFormST
   Left = 0
   Top = 0
   BorderStyle = bsDialog
@@ -55,9 +55,7 @@ object Form1: TForm1
         ''
         #9'//-----------------------------------------------------------'
         
-          #9'// Compiling C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a' +
-          '14_00000001-9_temp.cpp3.i (C:/Users/YARUND~1/AppData/Local/Temp/' +
-          'ccBI#.a03752)'
+          #9'// Compiling C:/Users/VPV~1/AppData/Local/Temp/...)'
         #9'//-----------------------------------------------------------'
         ''
         #9'//-----------------------------------------------------------'
@@ -69,1055 +67,6 @@ object Form1: TForm1
         #9'//  -m2'#9'(Report advisories)'
         #9'//-----------------------------------------------------------'
         ''
-        
-          #9'.file'#9'1'#9'"C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a14_0' +
-          '0000001-8_temp.cudafe2.gpu"'
-        
-          #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
-          'E\crtdefs.h"'
-        #9'.file'#9'3'#9'"C:\CUDA\include\crt/device_runtime.h"'
-        #9'.file'#9'4'#9'"C:\CUDA\include\host_defines.h"'
-        #9'.file'#9'5'#9'"C:\CUDA\include\builtin_types.h"'
-        #9'.file'#9'6'#9'"c:\cuda\include\device_types.h"'
-        #9'.file'#9'7'#9'"c:\cuda\include\driver_types.h"'
-        #9'.file'#9'8'#9'"c:\cuda\include\surface_types.h"'
-        #9'.file'#9'9'#9'"c:\cuda\include\texture_types.h"'
-        #9'.file'#9'10'#9'"c:\cuda\include\vector_types.h"'
-        #9'.file'#9'11'#9'"c:\cuda\include\host_defines.h"'
-        #9'.file'#9'12'#9'"C:\CUDA\include\device_launch_parameters.h"'
-        #9'.file'#9'13'#9'"c:\cuda\include\crt\storage_class.h"'
-        
-          #9'.file'#9'14'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' +
-          'DE\time.h"'
-        #9'.file'#9'15'#9'"C:/Users/YARUND~1/AppData/Local/Temp/temp.cu"'
-        #9'.file'#9'16'#9'"C:\CUDA\include\common_functions.h"'
-        #9'.file'#9'17'#9'"c:\cuda\include\crt/func_macro.h"'
-        #9'.file'#9'18'#9'"c:\cuda\include\math_functions.h"'
-        #9'.file'#9'19'#9'"c:\cuda\include\device_functions.h"'
-        #9'.file'#9'20'#9'"c:\cuda\include\math_constants.h"'
-        #9'.file'#9'21'#9'"c:\cuda\include\sm_11_atomic_functions.h"'
-        #9'.file'#9'22'#9'"c:\cuda\include\sm_12_atomic_functions.h"'
-        #9'.file'#9'23'#9'"c:\cuda\include\sm_13_double_functions.h"'
-        #9'.file'#9'24'#9'"c:\cuda\include\common_types.h"'
-        #9'.file'#9'25'#9'"c:\cuda\include\sm_20_atomic_functions.h"'
-        #9'.file'#9'26'#9'"c:\cuda\include\sm_20_intrinsics.h"'
-        #9'.file'#9'27'#9'"c:\cuda\include\surface_functions.h"'
-        #9'.file'#9'28'#9'"c:\cuda\include\texture_fetch_functions.h"'
-        #9'.file'#9'29'#9'"c:\cuda\include\math_functions_dbl_ptx1.h"'
-        ''
-        #9'.tex .u32 tex;'
-        
-          #9'.const .align 4 .b8 __cudart_i2opi_f[24] = {65,144,67,60,153,14' +
-          '9,98,219,192,221,52,245,209,87,39,252,41,21,68,78,110,131,249,16' +
-          '2};'
-        ''
-        #9'.entry transformKernel ('
-        #9#9'.param .u32 __cudaparm_transformKernel_g_odata,'
-        #9#9'.param .s32 __cudaparm_transformKernel_width,'
-        #9#9'.param .s32 __cudaparm_transformKernel_height,'
-        #9#9'.param .f32 __cudaparm_transformKernel_theta)'
-        #9'{'
-        #9'.reg .u16 %rh<6>;'
-        #9'.reg .u32 %r<279>;'
-        #9'.reg .f32 %f<145>;'
-        #9'.reg .pred %p<50>;'
-        #9'.local .align 4 .b8 __cuda_result_16[28];'
-        #9'.local .align 4 .b8 __cuda_result_44[28];'
-        #9'.loc'#9'15'#9'10'#9'0'
-        '$LBB1_transformKernel:'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'abs.f32 '#9'%f2, %f1;'
-        #9'mov.f32 '#9'%f3, 0f7f800000;     '#9'// 1.#INF'
-        #9'setp.eq.f32 '#9'%p1, %f2, %f3;'
-        #9'@!%p1 bra '#9'$Lt_0_46850;'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'neg.f32 '#9'%f4, %f1;'
-        #9'add.rn.f32 '#9'%f5, %f1, %f4;'
-        #9'mov.u32 '#9'%r1, __cudart_i2opi_f;'
-        #9'mov.u32 '#9'%r2, __cuda_result_16;'
-        #9'bra.uni '#9'$Lt_0_3330;'
-        '$Lt_0_46850:'
-        #9'.loc'#9'18'#9'1622'#9'0'
-        #9'mov.f32 '#9'%f6, 0f473ba700;     '#9'// 48039'
-        #9'setp.gt.f32 '#9'%p2, %f2, %f6;'
-        #9'.loc'#9'18'#9'1625'#9'0'
-        #9'mov.u32 '#9'%r1, __cudart_i2opi_f;'
-        #9'.loc'#9'18'#9'1622'#9'0'
-        #9'@!%p2 bra '#9'$Lt_0_47362;'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1625'#9'0'
-        #9'mov.b32 '#9'%r3, %f1;'
-        #9'and.b32 '#9'%r4, %r3, -2147483648;'
-        #9'mov.s32 '#9'%r5, %r4;'
-        #9'.loc'#9'18'#9'24'#9'0'
-        #9'shl.b32 '#9'%r6, %r3, 8;'
-        #9'mov.s32 '#9'%r7, %r1;'
-        #9'add.u32 '#9'%r8, %r1, 24;'
-        #9'mov.u32 '#9'%r9, __cuda_result_16;'
-        #9'or.b32 '#9'%r10, %r6, -2147483648;'
-        #9'mov.u32 '#9'%r11, 0;'
-        '$Lt_0_48386:'
-        ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
-        #9'.loc'#9'18'#9'1642'#9'0'
-        #9'ld.const.u32 '#9'%r12, [%r7+0];'
-        #9'mul.lo.u32 '#9'%r13, %r12, %r10;'
-        #9'add.u32 '#9'%r14, %r13, %r11;'
-        #9'.loc'#9'18'#9'1643'#9'0'
-        #9'set.gt.u32.u32 '#9'%r15, %r13, %r14;'
-        #9'neg.s32 '#9'%r16, %r15;'
-        #9'mul.hi.u32 '#9'%r17, %r12, %r10;'
-        #9'add.u32 '#9'%r11, %r16, %r17;'
-        #9'.loc'#9'18'#9'1644'#9'0'
-        #9'st.local.u32 '#9'[%r9+0], %r14;'
-        #9'add.u32 '#9'%r9, %r9, 4;'
-        #9'add.u32 '#9'%r7, %r7, 4;'
-        #9'setp.ne.u32 '#9'%p3, %r7, %r8;'
-        #9'@%p3 bra '#9'$Lt_0_48386;'
-        #9'.loc'#9'18'#9'1646'#9'0'
-        #9'mov.u32 '#9'%r2, __cuda_result_16;'
-        #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;'
-        #9'.loc'#9'18'#9'1651'#9'0'
-        #9'shl.b32 '#9'%r18, %r3, 1;'
-        #9'shr.u32 '#9'%r19, %r18, 24;'
-        #9'sub.u32 '#9'%r20, %r19, 128;'
-        #9'shr.u32 '#9'%r21, %r20, 5;'
-        #9'mov.s32 '#9'%r22, 4;'
-        #9'sub.s32 '#9'%r23, %r22, %r21;'
-        #9'mul.lo.u32 '#9'%r24, %r23, 4;'
-        #9'add.u32 '#9'%r25, %r24, %r2;'
-        #9'ld.local.u32 '#9'%r11, [%r25+8];'
-        #9'.loc'#9'18'#9'1652'#9'0'
-        #9'ld.local.u32 '#9'%r26, [%r25+4];'
-        #9'and.b32 '#9'%r27, %r20, 31;'
-        #9'mov.u32 '#9'%r28, 0;'
-        #9'setp.eq.u32 '#9'%p4, %r27, %r28;'
-        #9'@%p4 bra '#9'$Lt_0_48898;'
-        #9'.loc'#9'18'#9'1655'#9'0'
-        #9'mov.s32 '#9'%r29, 32;'
-        #9'sub.s32 '#9'%r30, %r29, %r27;'
-        #9'shr.u32 '#9'%r31, %r26, %r30;'
-        #9'shl.b32 '#9'%r32, %r11, %r27;'
-        #9'add.u32 '#9'%r11, %r31, %r32;'
-        #9'.loc'#9'18'#9'1656'#9'0'
-        #9'ld.local.u32 '#9'%r33, [%r25+0];'
-        #9'shr.u32 '#9'%r34, %r33, %r30;'
-        #9'shl.b32 '#9'%r35, %r26, %r27;'
-        #9'add.u32 '#9'%r26, %r34, %r35;'
-        '$Lt_0_48898:'
-        #9'.loc'#9'18'#9'1658'#9'0'
-        #9'shr.u32 '#9'%r36, %r11, 30;'
-        #9'.loc'#9'18'#9'1660'#9'0'
-        #9'shr.u32 '#9'%r37, %r26, 30;'
-        #9'shl.b32 '#9'%r38, %r11, 2;'
-        #9'add.u32 '#9'%r11, %r37, %r38;'
-        #9'.loc'#9'18'#9'1661'#9'0'
-        #9'shl.b32 '#9'%r26, %r26, 2;'
-        #9'mov.u32 '#9'%r39, 0;'
-        #9'setp.eq.u32 '#9'%p5, %r26, %r39;'
-        #9'@%p5 bra '#9'$Lt_0_49666;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'add.u32 '#9'%r40, %r11, 1;'
-        #9'mov.u32 '#9'%r41, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r42, %r40, %r41;'
-        #9'neg.s32 '#9'%r43, %r42;'
-        #9'bra.uni '#9'$Lt_0_49410;'
-        '$Lt_0_49666:'
-        #9'mov.u32 '#9'%r44, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r45, %r11, %r44;'
-        #9'neg.s32 '#9'%r43, %r45;'
-        '$Lt_0_49410:'
-        #9'.loc'#9'18'#9'1663'#9'0'
-        #9'add.u32 '#9'%r36, %r36, %r43;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'neg.s32 '#9'%r46, %r36;'
-        #9'mov.u32 '#9'%r47, 0;'
-        #9'setp.ne.u32 '#9'%p6, %r4, %r47;'
-        #9'selp.s32 '#9'%r36, %r46, %r36, %p6;'
-        #9'mov.u32 '#9'%r48, 0;'
-        #9'setp.eq.u32 '#9'%p7, %r43, %r48;'
-        #9'@%p7 bra '#9'$Lt_0_49922;'
-        #9'.loc'#9'18'#9'1668'#9'0'
-        #9'neg.s32 '#9'%r26, %r26;'
-        #9'.loc'#9'18'#9'1670'#9'0'
-        #9'mov.u32 '#9'%r49, 0;'
-        #9'set.eq.u32.u32 '#9'%r50, %r26, %r49;'
-        #9'neg.s32 '#9'%r51, %r50;'
-        #9'not.b32 '#9'%r52, %r11;'
-        #9'add.u32 '#9'%r11, %r51, %r52;'
-        #9'.loc'#9'18'#9'1671'#9'0'
-        #9'xor.b32 '#9'%r5, %r4, -2147483648;'
-        '$Lt_0_49922:'
-        #9'.loc'#9'18'#9'1673'#9'0'
-        #9'mov.s32 '#9'%r53, %r36;'
-        #9'mov.u32 '#9'%r54, 0;'
-        #9'setp.le.s32 '#9'%p8, %r11, %r54;'
-        #9'mov.u32 '#9'%r55, 0;'
-        #9'@%p8 bra '#9'$Lt_0_69378;'
-        '$Lt_0_50946:'
-        
-          ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
-          'tions: unknown'
-        #9'.loc'#9'18'#9'1677'#9'0'
-        #9'shr.u32 '#9'%r56, %r26, 31;'
-        #9'shl.b32 '#9'%r57, %r11, 1;'
-        #9'add.u32 '#9'%r11, %r56, %r57;'
-        #9'.loc'#9'18'#9'1678'#9'0'
-        #9'shl.b32 '#9'%r26, %r26, 1;'
-        #9'.loc'#9'18'#9'1679'#9'0'
-        #9'sub.u32 '#9'%r55, %r55, 1;'
-        #9'mov.u32 '#9'%r58, 0;'
-        #9'setp.gt.s32 '#9'%p9, %r11, %r58;'
-        #9'@%p9 bra '#9'$Lt_0_50946;'
-        #9'bra.uni '#9'$Lt_0_50434;'
-        '$Lt_0_69378:'
-        '$Lt_0_50434:'
-        #9'.loc'#9'18'#9'1681'#9'0'
-        #9'mul.lo.u32 '#9'%r26, %r11, -921707870;'
-        #9'.loc'#9'18'#9'1682'#9'0'
-        #9'mov.u32 '#9'%r59, -921707870;'
-        #9'mul.hi.u32 '#9'%r11, %r11, %r59;'
-        #9'mov.u32 '#9'%r60, 0;'
-        #9'setp.le.s32 '#9'%p10, %r11, %r60;'
-        #9'@%p10 bra '#9'$Lt_0_51458;'
-        #9'.loc'#9'18'#9'1684'#9'0'
-        #9'shr.u32 '#9'%r61, %r26, 31;'
-        #9'shl.b32 '#9'%r62, %r11, 1;'
-        #9'add.u32 '#9'%r11, %r61, %r62;'
-        #9'.loc'#9'18'#9'1685'#9'0'
-        #9'shl.b32 '#9'%r26, %r26, 1;'
-        #9'.loc'#9'18'#9'1686'#9'0'
-        #9'sub.u32 '#9'%r55, %r55, 1;'
-        '$Lt_0_51458:'
-        #9'.loc'#9'18'#9'1688'#9'0'
-        #9'mov.u32 '#9'%r63, 0;'
-        #9'set.ne.u32.u32 '#9'%r64, %r26, %r63;'
-        #9'neg.s32 '#9'%r65, %r64;'
-        #9'add.u32 '#9'%r11, %r65, %r11;'
-        #9'shl.b32 '#9'%r66, %r11, 24;'
-        #9'mov.s32 '#9'%r67, 0;'
-        #9'set.lt.u32.s32 '#9'%r68, %r66, %r67;'
-        #9'neg.s32 '#9'%r69, %r68;'
-        #9'shr.u32 '#9'%r70, %r11, 8;'
-        #9'add.u32 '#9'%r71, %r55, 126;'
-        #9'shl.b32 '#9'%r72, %r71, 23;'
-        #9'add.u32 '#9'%r73, %r70, %r72;'
-        #9'add.u32 '#9'%r74, %r69, %r73;'
-        #9'or.b32 '#9'%r75, %r5, %r74;'
-        #9'mov.b32 '#9'%f7, %r75;'
-        #9'bra.uni '#9'$Lt_0_3586;'
-        '$Lt_0_47362:'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mov.f32 '#9'%f8, 0f3f22f983;     '#9'// 0.63662'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mul.f32 '#9'%f9, %f1, %f8;'
-        #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
-        #9'mov.s32 '#9'%r53, %r76;'
-        #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
-        #9'neg.f32 '#9'%f11, %f10;'
-        #9'mov.f32 '#9'%f12, 0f3fc90000;    '#9'// 1.57031'
-        #9'mad.f32 '#9'%f13, %f12, %f11, %f1;'
-        #9'mov.f32 '#9'%f14, 0f39fd8000;    '#9'// 0.000483513'
-        #9'mad.f32 '#9'%f15, %f14, %f11, %f13;'
-        #9'mov.f32 '#9'%f16, 0f34a88000;    '#9'// 3.13856e-007'
-        #9'mad.f32 '#9'%f17, %f16, %f11, %f15;'
-        #9'mov.f32 '#9'%f18, 0f2e85a309;    '#9'// 6.0771e-011'
-        #9'mad.f32 '#9'%f7, %f18, %f11, %f17;'
-        #9'mov.u32 '#9'%r2, __cuda_result_16;'
-        '$Lt_0_3586:'
-        #9'.loc'#9'18'#9'1949'#9'0'
-        #9'add.s32 '#9'%r77, %r53, 1;'
-        #9'mul.f32 '#9'%f19, %f7, %f7;'
-        #9'and.b32 '#9'%r78, %r77, 1;'
-        #9'mov.u32 '#9'%r79, 0;'
-        #9'setp.eq.s32 '#9'%p11, %r78, %r79;'
-        #9'@%p11 bra '#9'$Lt_0_52226;'
-        #9'.loc'#9'18'#9'1953'#9'0'
-        #9'mov.f32 '#9'%f20, 0f3f800000;    '#9'// 1'
-        #9'mov.f32 '#9'%f21, 0fbf000000;    '#9'// -0.5'
-        #9'mov.f32 '#9'%f22, 0f3d2aaaa5;    '#9'// 0.0416666'
-        #9'mov.f32 '#9'%f23, 0fbab6061a;    '#9'// -0.00138873'
-        #9'mov.f32 '#9'%f24, 0f37ccf5ce;    '#9'// 2.44332e-005'
-        #9'mad.f32 '#9'%f25, %f24, %f19, %f23;'
-        #9'mad.f32 '#9'%f26, %f19, %f25, %f22;'
-        #9'mad.f32 '#9'%f27, %f19, %f26, %f21;'
-        #9'mad.f32 '#9'%f28, %f19, %f27, %f20;'
-        #9'bra.uni '#9'$Lt_0_51970;'
-        '$Lt_0_52226:'
-        #9'.loc'#9'18'#9'1955'#9'0'
-        #9'mov.f32 '#9'%f29, 0fbe2aaaa3;    '#9'// -0.166667'
-        #9'mov.f32 '#9'%f30, 0f3c08839e;    '#9'// 0.00833216'
-        #9'mov.f32 '#9'%f31, 0fb94ca1f9;    '#9'// -0.000195153'
-        #9'mad.f32 '#9'%f32, %f31, %f19, %f30;'
-        #9'mad.f32 '#9'%f33, %f19, %f32, %f29;'
-        #9'mul.f32 '#9'%f34, %f19, %f33;'
-        #9'mad.f32 '#9'%f28, %f34, %f7, %f7;'
-        '$Lt_0_51970:'
-        #9'.loc'#9'18'#9'1957'#9'0'
-        #9'neg.f32 '#9'%f35, %f28;'
-        #9'and.b32 '#9'%r80, %r77, 2;'
-        #9'mov.s32 '#9'%r81, 0;'
-        #9'setp.ne.s32 '#9'%p12, %r80, %r81;'
-        #9'selp.f32 '#9'%f28, %f35, %f28, %p12;'
-        #9'mov.f32 '#9'%f5, %f28;'
-        '$Lt_0_3330:'
-        #9'.loc'#9'18'#9'1869'#9'0'
-        #9'mov.f32 '#9'%f36, 0f00000000;    '#9'// 0'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1869'#9'0'
-        #9'setp.eq.f32 '#9'%p13, %f1, %f36;'
-        #9'selp.s32 '#9'%r82, 1, 0, %p13;'
-        #9'selp.s32 '#9'%r83, 1, 0, %p1;'
-        #9'or.b32 '#9'%r84, %r82, %r83;'
-        #9'mov.u32 '#9'%r85, 0;'
-        #9'setp.eq.s32 '#9'%p14, %r84, %r85;'
-        #9'@%p14 bra '#9'$Lt_0_52482;'
-        #9'mov.f32 '#9'%f37, 0f00000000;    '#9'// 0'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1869'#9'0'
-        #9'mul.rn.f32 '#9'%f38, %f1, %f37;'
-        #9'mov.u32 '#9'%r86, __cuda_result_44;'
-        #9'bra.uni '#9'$Lt_0_2306;'
-        '$Lt_0_52482:'
-        #9'.loc'#9'18'#9'1622'#9'0'
-        #9'mov.f32 '#9'%f39, 0f473ba700;    '#9'// 48039'
-        #9'setp.gt.f32 '#9'%p15, %f2, %f39;'
-        #9'@!%p15 bra '#9'$Lt_0_52994;'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1625'#9'0'
-        #9'mov.b32 '#9'%r3, %f1;'
-        #9'and.b32 '#9'%r4, %r3, -2147483648;'
-        #9'mov.s32 '#9'%r87, %r4;'
-        #9'.loc'#9'18'#9'24'#9'0'
-        #9'shl.b32 '#9'%r6, %r3, 8;'
-        #9'mov.s32 '#9'%r88, %r1;'
-        #9'add.u32 '#9'%r8, %r1, 24;'
-        #9'mov.u32 '#9'%r89, __cuda_result_44;'
-        #9'or.b32 '#9'%r10, %r6, -2147483648;'
-        #9'mov.u32 '#9'%r90, 0;'
-        '$Lt_0_54018:'
-        ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
-        #9'.loc'#9'18'#9'1642'#9'0'
-        #9'ld.const.u32 '#9'%r91, [%r88+0];'
-        #9'mul.lo.u32 '#9'%r92, %r91, %r10;'
-        #9'add.u32 '#9'%r93, %r92, %r90;'
-        #9'.loc'#9'18'#9'1643'#9'0'
-        #9'set.gt.u32.u32 '#9'%r94, %r92, %r93;'
-        #9'neg.s32 '#9'%r95, %r94;'
-        #9'mul.hi.u32 '#9'%r96, %r91, %r10;'
-        #9'add.u32 '#9'%r90, %r95, %r96;'
-        #9'.loc'#9'18'#9'1644'#9'0'
-        #9'st.local.u32 '#9'[%r89+0], %r93;'
-        #9'add.u32 '#9'%r89, %r89, 4;'
-        #9'add.u32 '#9'%r88, %r88, 4;'
-        #9'setp.ne.u32 '#9'%p16, %r88, %r8;'
-        #9'@%p16 bra '#9'$Lt_0_54018;'
-        #9'.loc'#9'18'#9'1646'#9'0'
-        #9'mov.u32 '#9'%r86, __cuda_result_44;'
-        #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;'
-        #9'.loc'#9'18'#9'1651'#9'0'
-        #9'shl.b32 '#9'%r97, %r3, 1;'
-        #9'shr.u32 '#9'%r19, %r97, 24;'
-        #9'sub.u32 '#9'%r20, %r19, 128;'
-        #9'shr.u32 '#9'%r21, %r20, 5;'
-        #9'mov.s32 '#9'%r98, 4;'
-        #9'sub.s32 '#9'%r23, %r98, %r21;'
-        #9'mul.lo.u32 '#9'%r24, %r23, 4;'
-        #9'add.u32 '#9'%r99, %r24, %r86;'
-        #9'ld.local.u32 '#9'%r90, [%r99+8];'
-        #9'.loc'#9'18'#9'1652'#9'0'
-        #9'ld.local.u32 '#9'%r100, [%r99+4];'
-        #9'and.b32 '#9'%r27, %r20, 31;'
-        #9'mov.u32 '#9'%r101, 0;'
-        #9'setp.eq.u32 '#9'%p17, %r27, %r101;'
-        #9'@%p17 bra '#9'$Lt_0_54530;'
-        #9'.loc'#9'18'#9'1655'#9'0'
-        #9'mov.s32 '#9'%r102, 32;'
-        #9'sub.s32 '#9'%r30, %r102, %r27;'
-        #9'shr.u32 '#9'%r103, %r100, %r30;'
-        #9'shl.b32 '#9'%r104, %r90, %r27;'
-        #9'add.u32 '#9'%r90, %r103, %r104;'
-        #9'.loc'#9'18'#9'1656'#9'0'
-        #9'ld.local.u32 '#9'%r105, [%r99+0];'
-        #9'shr.u32 '#9'%r106, %r105, %r30;'
-        #9'shl.b32 '#9'%r107, %r100, %r27;'
-        #9'add.u32 '#9'%r100, %r106, %r107;'
-        '$Lt_0_54530:'
-        #9'.loc'#9'18'#9'1658'#9'0'
-        #9'shr.u32 '#9'%r108, %r90, 30;'
-        #9'.loc'#9'18'#9'1660'#9'0'
-        #9'shr.u32 '#9'%r109, %r100, 30;'
-        #9'shl.b32 '#9'%r110, %r90, 2;'
-        #9'add.u32 '#9'%r90, %r109, %r110;'
-        #9'.loc'#9'18'#9'1661'#9'0'
-        #9'shl.b32 '#9'%r100, %r100, 2;'
-        #9'mov.u32 '#9'%r111, 0;'
-        #9'setp.eq.u32 '#9'%p18, %r100, %r111;'
-        #9'@%p18 bra '#9'$Lt_0_55298;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'add.u32 '#9'%r112, %r90, 1;'
-        #9'mov.u32 '#9'%r113, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r114, %r112, %r113;'
-        #9'neg.s32 '#9'%r115, %r114;'
-        #9'bra.uni '#9'$Lt_0_55042;'
-        '$Lt_0_55298:'
-        #9'mov.u32 '#9'%r116, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r117, %r90, %r116;'
-        #9'neg.s32 '#9'%r115, %r117;'
-        '$Lt_0_55042:'
-        #9'.loc'#9'18'#9'1663'#9'0'
-        #9'add.u32 '#9'%r108, %r108, %r115;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'neg.s32 '#9'%r118, %r108;'
-        #9'mov.u32 '#9'%r119, 0;'
-        #9'setp.ne.u32 '#9'%p19, %r4, %r119;'
-        #9'selp.s32 '#9'%r108, %r118, %r108, %p19;'
-        #9'mov.u32 '#9'%r120, 0;'
-        #9'setp.eq.u32 '#9'%p20, %r115, %r120;'
-        #9'@%p20 bra '#9'$Lt_0_55554;'
-        #9'.loc'#9'18'#9'1668'#9'0'
-        #9'neg.s32 '#9'%r100, %r100;'
-        #9'.loc'#9'18'#9'1670'#9'0'
-        #9'mov.u32 '#9'%r121, 0;'
-        #9'set.eq.u32.u32 '#9'%r122, %r100, %r121;'
-        #9'neg.s32 '#9'%r123, %r122;'
-        #9'not.b32 '#9'%r124, %r90;'
-        #9'add.u32 '#9'%r90, %r123, %r124;'
-        #9'.loc'#9'18'#9'1671'#9'0'
-        #9'xor.b32 '#9'%r87, %r4, -2147483648;'
-        '$Lt_0_55554:'
-        #9'.loc'#9'18'#9'1673'#9'0'
-        #9'mov.s32 '#9'%r125, %r108;'
-        #9'mov.u32 '#9'%r126, 0;'
-        #9'setp.le.s32 '#9'%p21, %r90, %r126;'
-        #9'mov.u32 '#9'%r127, 0;'
-        #9'@%p21 bra '#9'$Lt_0_69634;'
-        '$Lt_0_56578:'
-        
-          ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
-          'tions: unknown'
-        #9'.loc'#9'18'#9'1677'#9'0'
-        #9'shr.u32 '#9'%r128, %r100, 31;'
-        #9'shl.b32 '#9'%r129, %r90, 1;'
-        #9'add.u32 '#9'%r90, %r128, %r129;'
-        #9'.loc'#9'18'#9'1678'#9'0'
-        #9'shl.b32 '#9'%r100, %r100, 1;'
-        #9'.loc'#9'18'#9'1679'#9'0'
-        #9'sub.u32 '#9'%r127, %r127, 1;'
-        #9'mov.u32 '#9'%r130, 0;'
-        #9'setp.gt.s32 '#9'%p22, %r90, %r130;'
-        #9'@%p22 bra '#9'$Lt_0_56578;'
-        #9'bra.uni '#9'$Lt_0_56066;'
-        '$Lt_0_69634:'
-        '$Lt_0_56066:'
-        #9'.loc'#9'18'#9'1681'#9'0'
-        #9'mul.lo.u32 '#9'%r100, %r90, -921707870;'
-        #9'.loc'#9'18'#9'1682'#9'0'
-        #9'mov.u32 '#9'%r131, -921707870;'
-        #9'mul.hi.u32 '#9'%r90, %r90, %r131;'
-        #9'mov.u32 '#9'%r132, 0;'
-        #9'setp.le.s32 '#9'%p23, %r90, %r132;'
-        #9'@%p23 bra '#9'$Lt_0_57090;'
-        #9'.loc'#9'18'#9'1684'#9'0'
-        #9'shr.u32 '#9'%r133, %r100, 31;'
-        #9'shl.b32 '#9'%r134, %r90, 1;'
-        #9'add.u32 '#9'%r90, %r133, %r134;'
-        #9'.loc'#9'18'#9'1685'#9'0'
-        #9'shl.b32 '#9'%r100, %r100, 1;'
-        #9'.loc'#9'18'#9'1686'#9'0'
-        #9'sub.u32 '#9'%r127, %r127, 1;'
-        '$Lt_0_57090:'
-        #9'.loc'#9'18'#9'1688'#9'0'
-        #9'mov.u32 '#9'%r135, 0;'
-        #9'set.ne.u32.u32 '#9'%r136, %r100, %r135;'
-        #9'neg.s32 '#9'%r137, %r136;'
-        #9'add.u32 '#9'%r90, %r137, %r90;'
-        #9'shl.b32 '#9'%r138, %r90, 24;'
-        #9'mov.s32 '#9'%r139, 0;'
-        #9'set.lt.u32.s32 '#9'%r140, %r138, %r139;'
-        #9'neg.s32 '#9'%r141, %r140;'
-        #9'shr.u32 '#9'%r142, %r90, 8;'
-        #9'add.u32 '#9'%r143, %r127, 126;'
-        #9'shl.b32 '#9'%r144, %r143, 23;'
-        #9'add.u32 '#9'%r145, %r142, %r144;'
-        #9'add.u32 '#9'%r146, %r141, %r145;'
-        #9'or.b32 '#9'%r147, %r87, %r146;'
-        #9'mov.b32 '#9'%f40, %r147;'
-        #9'bra.uni '#9'$Lt_0_2562;'
-        '$Lt_0_52994:'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mov.f32 '#9'%f41, 0f3f22f983;    '#9'// 0.63662'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mul.f32 '#9'%f9, %f1, %f41;'
-        #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
-        #9'mov.s32 '#9'%r125, %r76;'
-        #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
-        #9'neg.f32 '#9'%f11, %f10;'
-        #9'mov.f32 '#9'%f42, 0f3fc90000;    '#9'// 1.57031'
-        #9'mad.f32 '#9'%f43, %f42, %f11, %f1;'
-        #9'mov.f32 '#9'%f44, 0f39fd8000;    '#9'// 0.000483513'
-        #9'mad.f32 '#9'%f45, %f44, %f11, %f43;'
-        #9'mov.f32 '#9'%f46, 0f34a88000;    '#9'// 3.13856e-007'
-        #9'mad.f32 '#9'%f47, %f46, %f11, %f45;'
-        #9'mov.f32 '#9'%f48, 0f2e85a309;    '#9'// 6.0771e-011'
-        #9'mad.f32 '#9'%f40, %f48, %f11, %f47;'
-        #9'mov.u32 '#9'%r86, __cuda_result_44;'
-        '$Lt_0_2562:'
-        #9'.loc'#9'18'#9'1872'#9'0'
-        #9'mul.f32 '#9'%f49, %f40, %f40;'
-        #9'and.b32 '#9'%r148, %r125, 1;'
-        #9'mov.u32 '#9'%r149, 0;'
-        #9'setp.eq.s32 '#9'%p24, %r148, %r149;'
-        #9'@%p24 bra '#9'$Lt_0_57858;'
-        #9'.loc'#9'18'#9'1875'#9'0'
-        #9'mov.f32 '#9'%f50, 0f3f800000;    '#9'// 1'
-        #9'mov.f32 '#9'%f51, 0fbf000000;    '#9'// -0.5'
-        #9'mov.f32 '#9'%f52, 0f3d2aaaa5;    '#9'// 0.0416666'
-        #9'mov.f32 '#9'%f53, 0fbab6061a;    '#9'// -0.00138873'
-        #9'mov.f32 '#9'%f54, 0f37ccf5ce;    '#9'// 2.44332e-005'
-        #9'mad.f32 '#9'%f55, %f54, %f49, %f53;'
-        #9'mad.f32 '#9'%f56, %f49, %f55, %f52;'
-        #9'mad.f32 '#9'%f57, %f49, %f56, %f51;'
-        #9'mad.f32 '#9'%f58, %f49, %f57, %f50;'
-        #9'bra.uni '#9'$Lt_0_57602;'
-        '$Lt_0_57858:'
-        #9'.loc'#9'18'#9'1877'#9'0'
-        #9'mov.f32 '#9'%f59, 0fbe2aaaa3;    '#9'// -0.166667'
-        #9'mov.f32 '#9'%f60, 0f3c08839e;    '#9'// 0.00833216'
-        #9'mov.f32 '#9'%f61, 0fb94ca1f9;    '#9'// -0.000195153'
-        #9'mad.f32 '#9'%f62, %f61, %f49, %f60;'
-        #9'mad.f32 '#9'%f63, %f49, %f62, %f59;'
-        #9'mul.f32 '#9'%f64, %f49, %f63;'
-        #9'mad.f32 '#9'%f58, %f64, %f40, %f40;'
-        '$Lt_0_57602:'
-        #9'.loc'#9'18'#9'1879'#9'0'
-        #9'neg.f32 '#9'%f65, %f58;'
-        #9'and.b32 '#9'%r150, %r125, 2;'
-        #9'mov.s32 '#9'%r151, 0;'
-        #9'setp.ne.s32 '#9'%p25, %r150, %r151;'
-        #9'selp.f32 '#9'%f58, %f65, %f58, %p25;'
-        #9'mov.f32 '#9'%f38, %f58;'
-        '$Lt_0_2306:'
-        #9'.loc'#9'15'#9'22'#9'0'
-        #9'mov.u16 '#9'%rh1, %ctaid.y;'
-        #9'mov.u16 '#9'%rh2, %ntid.y;'
-        #9'mul.wide.u16 '#9'%r152, %rh1, %rh2;'
-        #9'ld.param.s32 '#9'%r153, [__cudaparm_transformKernel_height];'
-        #9'cvt.rn.f32.s32 '#9'%f66, %r153;'
-        #9'mov.u16 '#9'%rh3, %ctaid.x;'
-        #9'mov.u16 '#9'%rh4, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r154, %rh3, %rh4;'
-        #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];'
-        #9'cvt.rn.f32.s32 '#9'%f67, %r155;'
-        #9'cvt.u32.u16 '#9'%r156, %tid.y;'
-        #9'add.u32 '#9'%r157, %r156, %r152;'
-        #9'cvt.u32.u16 '#9'%r158, %tid.x;'
-        #9'add.u32 '#9'%r159, %r158, %r154;'
-        #9'cvt.rn.f32.u32 '#9'%f68, %r157;'
-        #9'cvt.rn.f32.u32 '#9'%f69, %r159;'
-        #9'div.full.f32 '#9'%f70, %f68, %f66;'
-        #9'div.full.f32 '#9'%f71, %f69, %f67;'
-        #9'mov.f32 '#9'%f72, 0fbf000000;    '#9'// -0.5'
-        #9'add.f32 '#9'%f73, %f70, %f72;'
-        #9'mov.f32 '#9'%f74, 0fbf000000;    '#9'// -0.5'
-        #9'add.f32 '#9'%f75, %f71, %f74;'
-        #9'mul.f32 '#9'%f76, %f38, %f73;'
-        #9'mul.f32 '#9'%f77, %f75, %f5;'
-        #9'sub.f32 '#9'%f78, %f77, %f76;'
-        #9'mov.f32 '#9'%f79, 0f3f000000;    '#9'// 0.5'
-        #9'add.f32 '#9'%f80, %f78, %f79;'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'@!%p1 bra '#9'$Lt_0_58114;'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'neg.f32 '#9'%f81, %f1;'
-        #9'add.rn.f32 '#9'%f82, %f1, %f81;'
-        #9'bra.uni '#9'$Lt_0_1282;'
-        '$Lt_0_58114:'
-        #9'.loc'#9'18'#9'1622'#9'0'
-        #9'mov.f32 '#9'%f83, 0f473ba700;    '#9'// 48039'
-        #9'setp.gt.f32 '#9'%p26, %f2, %f83;'
-        #9'@!%p26 bra '#9'$Lt_0_58626;'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1625'#9'0'
-        #9'mov.b32 '#9'%r3, %f1;'
-        #9'and.b32 '#9'%r4, %r3, -2147483648;'
-        #9'mov.s32 '#9'%r5, %r4;'
-        #9'.loc'#9'18'#9'24'#9'0'
-        #9'shl.b32 '#9'%r6, %r3, 8;'
-        #9'mov.s32 '#9'%r7, %r1;'
-        #9'add.u32 '#9'%r8, %r1, 24;'
-        #9'mov.u32 '#9'%r9, __cuda_result_16;'
-        #9'or.b32 '#9'%r10, %r6, -2147483648;'
-        #9'mov.u32 '#9'%r11, 0;'
-        '$Lt_0_59650:'
-        ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
-        #9'.loc'#9'18'#9'1642'#9'0'
-        #9'ld.const.u32 '#9'%r160, [%r7+0];'
-        #9'mul.lo.u32 '#9'%r161, %r160, %r10;'
-        #9'add.u32 '#9'%r162, %r161, %r11;'
-        #9'.loc'#9'18'#9'1643'#9'0'
-        #9'set.gt.u32.u32 '#9'%r163, %r161, %r162;'
-        #9'neg.s32 '#9'%r164, %r163;'
-        #9'mul.hi.u32 '#9'%r165, %r160, %r10;'
-        #9'add.u32 '#9'%r11, %r164, %r165;'
-        #9'.loc'#9'18'#9'1644'#9'0'
-        #9'st.local.u32 '#9'[%r9+0], %r162;'
-        #9'add.u32 '#9'%r9, %r9, 4;'
-        #9'add.u32 '#9'%r7, %r7, 4;'
-        #9'setp.ne.u32 '#9'%p27, %r7, %r8;'
-        #9'@%p27 bra '#9'$Lt_0_59650;'
-        #9'.loc'#9'18'#9'1646'#9'0'
-        #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;'
-        #9'.loc'#9'18'#9'1651'#9'0'
-        #9'shl.b32 '#9'%r166, %r3, 1;'
-        #9'shr.u32 '#9'%r19, %r166, 24;'
-        #9'sub.u32 '#9'%r20, %r19, 128;'
-        #9'shr.u32 '#9'%r21, %r20, 5;'
-        #9'mov.s32 '#9'%r167, 4;'
-        #9'sub.s32 '#9'%r23, %r167, %r21;'
-        #9'mul.lo.u32 '#9'%r24, %r23, 4;'
-        #9'add.u32 '#9'%r25, %r24, %r2;'
-        #9'ld.local.u32 '#9'%r11, [%r25+8];'
-        #9'.loc'#9'18'#9'1652'#9'0'
-        #9'ld.local.u32 '#9'%r26, [%r25+4];'
-        #9'and.b32 '#9'%r27, %r20, 31;'
-        #9'mov.u32 '#9'%r168, 0;'
-        #9'setp.eq.u32 '#9'%p28, %r27, %r168;'
-        #9'@%p28 bra '#9'$Lt_0_60162;'
-        #9'.loc'#9'18'#9'1655'#9'0'
-        #9'mov.s32 '#9'%r169, 32;'
-        #9'sub.s32 '#9'%r30, %r169, %r27;'
-        #9'shr.u32 '#9'%r170, %r26, %r30;'
-        #9'shl.b32 '#9'%r171, %r11, %r27;'
-        #9'add.u32 '#9'%r11, %r170, %r171;'
-        #9'.loc'#9'18'#9'1656'#9'0'
-        #9'ld.local.u32 '#9'%r172, [%r25+0];'
-        #9'shr.u32 '#9'%r173, %r172, %r30;'
-        #9'shl.b32 '#9'%r174, %r26, %r27;'
-        #9'add.u32 '#9'%r26, %r173, %r174;'
-        '$Lt_0_60162:'
-        #9'.loc'#9'18'#9'1658'#9'0'
-        #9'shr.u32 '#9'%r36, %r11, 30;'
-        #9'.loc'#9'18'#9'1660'#9'0'
-        #9'shr.u32 '#9'%r175, %r26, 30;'
-        #9'shl.b32 '#9'%r176, %r11, 2;'
-        #9'add.u32 '#9'%r11, %r175, %r176;'
-        #9'.loc'#9'18'#9'1661'#9'0'
-        #9'shl.b32 '#9'%r26, %r26, 2;'
-        #9'mov.u32 '#9'%r177, 0;'
-        #9'setp.eq.u32 '#9'%p29, %r26, %r177;'
-        #9'@%p29 bra '#9'$Lt_0_60930;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'add.u32 '#9'%r178, %r11, 1;'
-        #9'mov.u32 '#9'%r179, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r180, %r178, %r179;'
-        #9'neg.s32 '#9'%r181, %r180;'
-        #9'bra.uni '#9'$Lt_0_60674;'
-        '$Lt_0_60930:'
-        #9'mov.u32 '#9'%r182, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r183, %r11, %r182;'
-        #9'neg.s32 '#9'%r181, %r183;'
-        '$Lt_0_60674:'
-        #9'.loc'#9'18'#9'1663'#9'0'
-        #9'add.u32 '#9'%r36, %r36, %r181;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'neg.s32 '#9'%r184, %r36;'
-        #9'mov.u32 '#9'%r185, 0;'
-        #9'setp.ne.u32 '#9'%p30, %r4, %r185;'
-        #9'selp.s32 '#9'%r36, %r184, %r36, %p30;'
-        #9'mov.u32 '#9'%r186, 0;'
-        #9'setp.eq.u32 '#9'%p31, %r181, %r186;'
-        #9'@%p31 bra '#9'$Lt_0_61186;'
-        #9'.loc'#9'18'#9'1668'#9'0'
-        #9'neg.s32 '#9'%r26, %r26;'
-        #9'.loc'#9'18'#9'1670'#9'0'
-        #9'mov.u32 '#9'%r187, 0;'
-        #9'set.eq.u32.u32 '#9'%r188, %r26, %r187;'
-        #9'neg.s32 '#9'%r189, %r188;'
-        #9'not.b32 '#9'%r190, %r11;'
-        #9'add.u32 '#9'%r11, %r189, %r190;'
-        #9'.loc'#9'18'#9'1671'#9'0'
-        #9'xor.b32 '#9'%r5, %r4, -2147483648;'
-        '$Lt_0_61186:'
-        #9'.loc'#9'18'#9'1673'#9'0'
-        #9'mov.s32 '#9'%r53, %r36;'
-        #9'mov.u32 '#9'%r191, 0;'
-        #9'setp.le.s32 '#9'%p32, %r11, %r191;'
-        #9'@%p32 bra '#9'$Lt_0_69890;'
-        #9'mov.u32 '#9'%r55, 0;'
-        '$Lt_0_62210:'
-        
-          ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
-          'tions: unknown'
-        #9'.loc'#9'18'#9'1677'#9'0'
-        #9'shr.u32 '#9'%r192, %r26, 31;'
-        #9'shl.b32 '#9'%r193, %r11, 1;'
-        #9'add.u32 '#9'%r11, %r192, %r193;'
-        #9'.loc'#9'18'#9'1678'#9'0'
-        #9'shl.b32 '#9'%r26, %r26, 1;'
-        #9'.loc'#9'18'#9'1679'#9'0'
-        #9'sub.u32 '#9'%r55, %r55, 1;'
-        #9'mov.u32 '#9'%r194, 0;'
-        #9'setp.gt.s32 '#9'%p33, %r11, %r194;'
-        #9'@%p33 bra '#9'$Lt_0_62210;'
-        #9'bra.uni '#9'$Lt_0_61698;'
-        '$Lt_0_69890:'
-        #9'mov.u32 '#9'%r55, 0;'
-        '$Lt_0_61698:'
-        #9'.loc'#9'18'#9'1681'#9'0'
-        #9'mul.lo.u32 '#9'%r26, %r11, -921707870;'
-        #9'.loc'#9'18'#9'1682'#9'0'
-        #9'mov.u32 '#9'%r195, -921707870;'
-        #9'mul.hi.u32 '#9'%r11, %r11, %r195;'
-        #9'mov.u32 '#9'%r196, 0;'
-        #9'setp.le.s32 '#9'%p34, %r11, %r196;'
-        #9'@%p34 bra '#9'$Lt_0_62722;'
-        #9'.loc'#9'18'#9'1684'#9'0'
-        #9'shr.u32 '#9'%r197, %r26, 31;'
-        #9'shl.b32 '#9'%r198, %r11, 1;'
-        #9'add.u32 '#9'%r11, %r197, %r198;'
-        #9'.loc'#9'18'#9'1685'#9'0'
-        #9'shl.b32 '#9'%r26, %r26, 1;'
-        #9'.loc'#9'18'#9'1686'#9'0'
-        #9'sub.u32 '#9'%r55, %r55, 1;'
-        '$Lt_0_62722:'
-        #9'.loc'#9'18'#9'1688'#9'0'
-        #9'mov.u32 '#9'%r199, 0;'
-        #9'set.ne.u32.u32 '#9'%r200, %r26, %r199;'
-        #9'neg.s32 '#9'%r201, %r200;'
-        #9'add.u32 '#9'%r11, %r201, %r11;'
-        #9'shl.b32 '#9'%r202, %r11, 24;'
-        #9'mov.s32 '#9'%r203, 0;'
-        #9'set.lt.u32.s32 '#9'%r204, %r202, %r203;'
-        #9'neg.s32 '#9'%r205, %r204;'
-        #9'shr.u32 '#9'%r206, %r11, 8;'
-        #9'add.u32 '#9'%r207, %r55, 126;'
-        #9'shl.b32 '#9'%r208, %r207, 23;'
-        #9'add.u32 '#9'%r209, %r206, %r208;'
-        #9'add.u32 '#9'%r210, %r205, %r209;'
-        #9'or.b32 '#9'%r211, %r5, %r210;'
-        #9'mov.b32 '#9'%f7, %r211;'
-        #9'bra.uni '#9'$Lt_0_1538;'
-        '$Lt_0_58626:'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mov.f32 '#9'%f84, 0f3f22f983;    '#9'// 0.63662'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mul.f32 '#9'%f9, %f1, %f84;'
-        #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
-        #9'mov.s32 '#9'%r53, %r76;'
-        #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
-        #9'neg.f32 '#9'%f11, %f10;'
-        #9'mov.f32 '#9'%f85, 0f3fc90000;    '#9'// 1.57031'
-        #9'mad.f32 '#9'%f86, %f85, %f11, %f1;'
-        #9'mov.f32 '#9'%f87, 0f39fd8000;    '#9'// 0.000483513'
-        #9'mad.f32 '#9'%f88, %f87, %f11, %f86;'
-        #9'mov.f32 '#9'%f89, 0f34a88000;    '#9'// 3.13856e-007'
-        #9'mad.f32 '#9'%f90, %f89, %f11, %f88;'
-        #9'mov.f32 '#9'%f91, 0f2e85a309;    '#9'// 6.0771e-011'
-        #9'mad.f32 '#9'%f7, %f91, %f11, %f90;'
-        '$Lt_0_1538:'
-        #9'.loc'#9'18'#9'1949'#9'0'
-        #9'add.s32 '#9'%r77, %r53, 1;'
-        #9'mul.f32 '#9'%f19, %f7, %f7;'
-        #9'and.b32 '#9'%r212, %r77, 1;'
-        #9'mov.u32 '#9'%r213, 0;'
-        #9'setp.eq.s32 '#9'%p35, %r212, %r213;'
-        #9'@%p35 bra '#9'$Lt_0_63490;'
-        #9'.loc'#9'18'#9'1953'#9'0'
-        #9'mov.f32 '#9'%f92, 0f3f800000;    '#9'// 1'
-        #9'mov.f32 '#9'%f93, 0fbf000000;    '#9'// -0.5'
-        #9'mov.f32 '#9'%f94, 0f3d2aaaa5;    '#9'// 0.0416666'
-        #9'mov.f32 '#9'%f95, 0fbab6061a;    '#9'// -0.00138873'
-        #9'mov.f32 '#9'%f96, 0f37ccf5ce;    '#9'// 2.44332e-005'
-        #9'mad.f32 '#9'%f97, %f96, %f19, %f95;'
-        #9'mad.f32 '#9'%f98, %f19, %f97, %f94;'
-        #9'mad.f32 '#9'%f99, %f19, %f98, %f93;'
-        #9'mad.f32 '#9'%f28, %f19, %f99, %f92;'
-        #9'bra.uni '#9'$Lt_0_63234;'
-        '$Lt_0_63490:'
-        #9'.loc'#9'18'#9'1955'#9'0'
-        #9'mov.f32 '#9'%f100, 0fbe2aaaa3;   '#9'// -0.166667'
-        #9'mov.f32 '#9'%f101, 0f3c08839e;   '#9'// 0.00833216'
-        #9'mov.f32 '#9'%f102, 0fb94ca1f9;   '#9'// -0.000195153'
-        #9'mad.f32 '#9'%f103, %f102, %f19, %f101;'
-        #9'mad.f32 '#9'%f104, %f19, %f103, %f100;'
-        #9'mul.f32 '#9'%f105, %f19, %f104;'
-        #9'mad.f32 '#9'%f28, %f105, %f7, %f7;'
-        '$Lt_0_63234:'
-        #9'.loc'#9'18'#9'1957'#9'0'
-        #9'neg.f32 '#9'%f106, %f28;'
-        #9'and.b32 '#9'%r214, %r77, 2;'
-        #9'mov.s32 '#9'%r215, 0;'
-        #9'setp.ne.s32 '#9'%p36, %r214, %r215;'
-        #9'selp.f32 '#9'%f28, %f106, %f28, %p36;'
-        #9'mov.f32 '#9'%f82, %f28;'
-        '$Lt_0_1282:'
-        #9'.loc'#9'18'#9'1869'#9'0'
-        #9'mov.u32 '#9'%r216, 0;'
-        #9'setp.eq.s32 '#9'%p37, %r84, %r216;'
-        #9'@%p37 bra '#9'$Lt_0_63746;'
-        #9'mov.f32 '#9'%f107, 0f00000000;   '#9'// 0'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1869'#9'0'
-        #9'mul.rn.f32 '#9'%f38, %f1, %f107;'
-        #9'bra.uni '#9'$Lt_0_258;'
-        '$Lt_0_63746:'
-        #9'.loc'#9'18'#9'1622'#9'0'
-        #9'mov.f32 '#9'%f108, 0f473ba700;   '#9'// 48039'
-        #9'setp.gt.f32 '#9'%p38, %f2, %f108;'
-        #9'@!%p38 bra '#9'$Lt_0_64258;'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1625'#9'0'
-        #9'mov.b32 '#9'%r3, %f1;'
-        #9'and.b32 '#9'%r4, %r3, -2147483648;'
-        #9'mov.s32 '#9'%r87, %r4;'
-        #9'.loc'#9'18'#9'24'#9'0'
-        #9'shl.b32 '#9'%r6, %r3, 8;'
-        #9'mov.s32 '#9'%r88, %r1;'
-        #9'add.u32 '#9'%r8, %r1, 24;'
-        #9'mov.u32 '#9'%r89, __cuda_result_44;'
-        #9'or.b32 '#9'%r10, %r6, -2147483648;'
-        #9'mov.u32 '#9'%r90, 0;'
-        '$Lt_0_65282:'
-        ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
-        #9'.loc'#9'18'#9'1642'#9'0'
-        #9'ld.const.u32 '#9'%r217, [%r88+0];'
-        #9'mul.lo.u32 '#9'%r218, %r217, %r10;'
-        #9'add.u32 '#9'%r219, %r218, %r90;'
-        #9'.loc'#9'18'#9'1643'#9'0'
-        #9'set.gt.u32.u32 '#9'%r220, %r218, %r219;'
-        #9'neg.s32 '#9'%r221, %r220;'
-        #9'mul.hi.u32 '#9'%r222, %r217, %r10;'
-        #9'add.u32 '#9'%r90, %r221, %r222;'
-        #9'.loc'#9'18'#9'1644'#9'0'
-        #9'st.local.u32 '#9'[%r89+0], %r219;'
-        #9'add.u32 '#9'%r89, %r89, 4;'
-        #9'add.u32 '#9'%r88, %r88, 4;'
-        #9'setp.ne.u32 '#9'%p39, %r88, %r8;'
-        #9'@%p39 bra '#9'$Lt_0_65282;'
-        #9'.loc'#9'18'#9'1646'#9'0'
-        #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;'
-        #9'.loc'#9'18'#9'1651'#9'0'
-        #9'shl.b32 '#9'%r223, %r3, 1;'
-        #9'shr.u32 '#9'%r19, %r223, 24;'
-        #9'sub.u32 '#9'%r20, %r19, 128;'
-        #9'shr.u32 '#9'%r21, %r20, 5;'
-        #9'mov.s32 '#9'%r224, 4;'
-        #9'sub.s32 '#9'%r23, %r224, %r21;'
-        #9'mul.lo.u32 '#9'%r24, %r23, 4;'
-        #9'add.u32 '#9'%r99, %r24, %r86;'
-        #9'ld.local.u32 '#9'%r90, [%r99+8];'
-        #9'.loc'#9'18'#9'1652'#9'0'
-        #9'ld.local.u32 '#9'%r100, [%r99+4];'
-        #9'and.b32 '#9'%r27, %r20, 31;'
-        #9'mov.u32 '#9'%r225, 0;'
-        #9'setp.eq.u32 '#9'%p40, %r27, %r225;'
-        #9'@%p40 bra '#9'$Lt_0_65794;'
-        #9'.loc'#9'18'#9'1655'#9'0'
-        #9'mov.s32 '#9'%r226, 32;'
-        #9'sub.s32 '#9'%r30, %r226, %r27;'
-        #9'shr.u32 '#9'%r227, %r100, %r30;'
-        #9'shl.b32 '#9'%r228, %r90, %r27;'
-        #9'add.u32 '#9'%r90, %r227, %r228;'
-        #9'.loc'#9'18'#9'1656'#9'0'
-        #9'ld.local.u32 '#9'%r229, [%r99+0];'
-        #9'shr.u32 '#9'%r230, %r229, %r30;'
-        #9'shl.b32 '#9'%r231, %r100, %r27;'
-        #9'add.u32 '#9'%r100, %r230, %r231;'
-        '$Lt_0_65794:'
-        #9'.loc'#9'18'#9'1658'#9'0'
-        #9'shr.u32 '#9'%r108, %r90, 30;'
-        #9'.loc'#9'18'#9'1660'#9'0'
-        #9'shr.u32 '#9'%r232, %r100, 30;'
-        #9'shl.b32 '#9'%r233, %r90, 2;'
-        #9'add.u32 '#9'%r90, %r232, %r233;'
-        #9'.loc'#9'18'#9'1661'#9'0'
-        #9'shl.b32 '#9'%r100, %r100, 2;'
-        #9'mov.u32 '#9'%r234, 0;'
-        #9'setp.eq.u32 '#9'%p41, %r100, %r234;'
-        #9'@%p41 bra '#9'$Lt_0_66562;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'add.u32 '#9'%r235, %r90, 1;'
-        #9'mov.u32 '#9'%r236, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r237, %r235, %r236;'
-        #9'neg.s32 '#9'%r238, %r237;'
-        #9'bra.uni '#9'$Lt_0_66306;'
-        '$Lt_0_66562:'
-        #9'mov.u32 '#9'%r239, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r240, %r90, %r239;'
-        #9'neg.s32 '#9'%r238, %r240;'
-        '$Lt_0_66306:'
-        #9'.loc'#9'18'#9'1663'#9'0'
-        #9'add.u32 '#9'%r108, %r108, %r238;'
-        #9'.loc'#9'18'#9'1662'#9'0'
-        #9'neg.s32 '#9'%r241, %r108;'
-        #9'mov.u32 '#9'%r242, 0;'
-        #9'setp.ne.u32 '#9'%p42, %r4, %r242;'
-        #9'selp.s32 '#9'%r108, %r241, %r108, %p42;'
-        #9'mov.u32 '#9'%r243, 0;'
-        #9'setp.eq.u32 '#9'%p43, %r238, %r243;'
-        #9'@%p43 bra '#9'$Lt_0_66818;'
-        #9'.loc'#9'18'#9'1668'#9'0'
-        #9'neg.s32 '#9'%r100, %r100;'
-        #9'.loc'#9'18'#9'1670'#9'0'
-        #9'mov.u32 '#9'%r244, 0;'
-        #9'set.eq.u32.u32 '#9'%r245, %r100, %r244;'
-        #9'neg.s32 '#9'%r246, %r245;'
-        #9'not.b32 '#9'%r247, %r90;'
-        #9'add.u32 '#9'%r90, %r246, %r247;'
-        #9'.loc'#9'18'#9'1671'#9'0'
-        #9'xor.b32 '#9'%r87, %r4, -2147483648;'
-        '$Lt_0_66818:'
-        #9'.loc'#9'18'#9'1673'#9'0'
-        #9'mov.s32 '#9'%r125, %r108;'
-        #9'mov.u32 '#9'%r248, 0;'
-        #9'setp.le.s32 '#9'%p44, %r90, %r248;'
-        #9'@%p44 bra '#9'$Lt_0_70146;'
-        #9'mov.u32 '#9'%r127, 0;'
-        '$Lt_0_67842:'
-        
-          ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
-          'tions: unknown'
-        #9'.loc'#9'18'#9'1677'#9'0'
-        #9'shr.u32 '#9'%r249, %r100, 31;'
-        #9'shl.b32 '#9'%r250, %r90, 1;'
-        #9'add.u32 '#9'%r90, %r249, %r250;'
-        #9'.loc'#9'18'#9'1678'#9'0'
-        #9'shl.b32 '#9'%r100, %r100, 1;'
-        #9'.loc'#9'18'#9'1679'#9'0'
-        #9'sub.u32 '#9'%r127, %r127, 1;'
-        #9'mov.u32 '#9'%r251, 0;'
-        #9'setp.gt.s32 '#9'%p45, %r90, %r251;'
-        #9'@%p45 bra '#9'$Lt_0_67842;'
-        #9'bra.uni '#9'$Lt_0_67330;'
-        '$Lt_0_70146:'
-        #9'mov.u32 '#9'%r127, 0;'
-        '$Lt_0_67330:'
-        #9'.loc'#9'18'#9'1681'#9'0'
-        #9'mul.lo.u32 '#9'%r100, %r90, -921707870;'
-        #9'.loc'#9'18'#9'1682'#9'0'
-        #9'mov.u32 '#9'%r252, -921707870;'
-        #9'mul.hi.u32 '#9'%r90, %r90, %r252;'
-        #9'mov.u32 '#9'%r253, 0;'
-        #9'setp.le.s32 '#9'%p46, %r90, %r253;'
-        #9'@%p46 bra '#9'$Lt_0_68354;'
-        #9'.loc'#9'18'#9'1684'#9'0'
-        #9'shr.u32 '#9'%r254, %r100, 31;'
-        #9'shl.b32 '#9'%r255, %r90, 1;'
-        #9'add.u32 '#9'%r90, %r254, %r255;'
-        #9'.loc'#9'18'#9'1685'#9'0'
-        #9'shl.b32 '#9'%r100, %r100, 1;'
-        #9'.loc'#9'18'#9'1686'#9'0'
-        #9'sub.u32 '#9'%r127, %r127, 1;'
-        '$Lt_0_68354:'
-        #9'.loc'#9'18'#9'1688'#9'0'
-        #9'mov.u32 '#9'%r256, 0;'
-        #9'set.ne.u32.u32 '#9'%r257, %r100, %r256;'
-        #9'neg.s32 '#9'%r258, %r257;'
-        #9'add.u32 '#9'%r90, %r258, %r90;'
-        #9'shl.b32 '#9'%r259, %r90, 24;'
-        #9'mov.s32 '#9'%r260, 0;'
-        #9'set.lt.u32.s32 '#9'%r261, %r259, %r260;'
-        #9'neg.s32 '#9'%r262, %r261;'
-        #9'shr.u32 '#9'%r263, %r90, 8;'
-        #9'add.u32 '#9'%r264, %r127, 126;'
-        #9'shl.b32 '#9'%r265, %r264, 23;'
-        #9'add.u32 '#9'%r266, %r263, %r265;'
-        #9'add.u32 '#9'%r267, %r262, %r266;'
-        #9'or.b32 '#9'%r268, %r87, %r267;'
-        #9'mov.b32 '#9'%f40, %r268;'
-        #9'bra.uni '#9'$Lt_0_514;'
-        '$Lt_0_64258:'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mov.f32 '#9'%f109, 0f3f22f983;   '#9'// 0.63662'
-        #9'.loc'#9'18'#9'1946'#9'0'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'mul.f32 '#9'%f9, %f1, %f109;'
-        #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
-        #9'mov.s32 '#9'%r125, %r76;'
-        #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
-        #9'neg.f32 '#9'%f11, %f10;'
-        #9'mov.f32 '#9'%f110, 0f3fc90000;   '#9'// 1.57031'
-        #9'mad.f32 '#9'%f111, %f110, %f11, %f1;'
-        #9'mov.f32 '#9'%f112, 0f39fd8000;   '#9'// 0.000483513'
-        #9'mad.f32 '#9'%f113, %f112, %f11, %f111;'
-        #9'mov.f32 '#9'%f114, 0f34a88000;   '#9'// 3.13856e-007'
-        #9'mad.f32 '#9'%f115, %f114, %f11, %f113;'
-        #9'mov.f32 '#9'%f116, 0f2e85a309;   '#9'// 6.0771e-011'
-        #9'mad.f32 '#9'%f40, %f116, %f11, %f115;'
-        '$Lt_0_514:'
-        #9'.loc'#9'18'#9'1872'#9'0'
-        #9'mul.f32 '#9'%f49, %f40, %f40;'
-        #9'and.b32 '#9'%r269, %r125, 1;'
-        #9'mov.u32 '#9'%r270, 0;'
-        #9'setp.eq.s32 '#9'%p47, %r269, %r270;'
-        #9'@%p47 bra '#9'$Lt_0_69122;'
-        #9'.loc'#9'18'#9'1875'#9'0'
-        #9'mov.f32 '#9'%f117, 0f3f800000;   '#9'// 1'
-        #9'mov.f32 '#9'%f118, 0fbf000000;   '#9'// -0.5'
-        #9'mov.f32 '#9'%f119, 0f3d2aaaa5;   '#9'// 0.0416666'
-        #9'mov.f32 '#9'%f120, 0fbab6061a;   '#9'// -0.00138873'
-        #9'mov.f32 '#9'%f121, 0f37ccf5ce;   '#9'// 2.44332e-005'
-        #9'mad.f32 '#9'%f122, %f121, %f49, %f120;'
-        #9'mad.f32 '#9'%f123, %f49, %f122, %f119;'
-        #9'mad.f32 '#9'%f124, %f49, %f123, %f118;'
-        #9'mad.f32 '#9'%f58, %f49, %f124, %f117;'
-        #9'bra.uni '#9'$Lt_0_68866;'
-        '$Lt_0_69122:'
-        #9'.loc'#9'18'#9'1877'#9'0'
-        #9'mov.f32 '#9'%f125, 0fbe2aaaa3;   '#9'// -0.166667'
-        #9'mov.f32 '#9'%f126, 0f3c08839e;   '#9'// 0.00833216'
-        #9'mov.f32 '#9'%f127, 0fb94ca1f9;   '#9'// -0.000195153'
-        #9'mad.f32 '#9'%f128, %f127, %f49, %f126;'
-        #9'mad.f32 '#9'%f129, %f49, %f128, %f125;'
-        #9'mul.f32 '#9'%f130, %f49, %f129;'
-        #9'mad.f32 '#9'%f58, %f130, %f40, %f40;'
-        '$Lt_0_68866:'
-        #9'.loc'#9'18'#9'1879'#9'0'
-        #9'neg.f32 '#9'%f131, %f58;'
-        #9'and.b32 '#9'%r271, %r125, 2;'
-        #9'mov.s32 '#9'%r272, 0;'
-        #9'setp.ne.s32 '#9'%p48, %r271, %r272;'
-        #9'selp.f32 '#9'%f58, %f131, %f58, %p48;'
-        #9'mov.f32 '#9'%f38, %f58;'
-        '$Lt_0_258:'
-        #9'.loc'#9'15'#9'23'#9'0'
-        #9'mov.f32 '#9'%f132, %f80;'
-        #9'mul.f32 '#9'%f133, %f82, %f73;'
-        #9'mad.f32 '#9'%f134, %f75, %f38, %f133;'
-        #9'mov.f32 '#9'%f135, 0f3f000000;   '#9'// 0.5'
-        #9'add.f32 '#9'%f136, %f134, %f135;'
-        #9'mov.f32 '#9'%f137, 0f00000000;   '#9'// 0'
-        #9'mov.f32 '#9'%f138, 0f00000000;   '#9'// 0'
-        
-          #9'tex.2d.v4.f32.f32 {%f139,%f140,%f141,%f142},[tex,{%f132,%f136,%' +
-          'f137,%f138}];'
-        #9'.loc'#9'15'#9'26'#9'0'
-        #9'mov.f32 '#9'%f143, %f139;'
-        #9'ld.param.u32 '#9'%r273, [__cudaparm_transformKernel_g_odata];'
-        #9'.loc'#9'15'#9'22'#9'0'
-        #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];'
-        #9'.loc'#9'15'#9'26'#9'0'
-        #9'mul.lo.u32 '#9'%r274, %r155, %r157;'
-        #9'add.u32 '#9'%r275, %r159, %r274;'
-        #9'mul.lo.u32 '#9'%r276, %r275, 4;'
-        #9'add.u32 '#9'%r277, %r273, %r276;'
-        #9'st.global.f32 '#9'[%r277+0], %f143;'
-        #9'.loc'#9'15'#9'27'#9'0'
         #9'exit;'
         '$LDWend_transformKernel:'
         #9'} // transformKernel'

+ 6 - 6
Examples/Demos/computing/SimpleCUDATexture/fSimpleTexD.pas

@@ -26,7 +26,7 @@ uses
   GLS.TextureFormat;
 
 type
-  TForm1 = class(TForm)
+  TFormST = class(TForm)
     GLCUDA1: TGLCUDA;
     GLCUDADevice1: TGLCUDADevice;
     GLCUDACompiler1: TGLCUDACompiler;
@@ -50,7 +50,7 @@ type
   TGLBitmap32 = TGLImage;   // comment if supported Graphics32
 
 var
-  Form1: TForm1;
+  FormST: TFormST;
   Angle : Single = 0.5;    // angle to rotate image by (in radians)
 
 //-----------------------------------------
@@ -64,14 +64,14 @@ const
   OutFileName  = 'lena_bw_out.pgm';
 
 
-procedure TForm1.FormCreate(Sender: TObject);
+procedure TFormST.FormCreate(Sender: TObject);
 begin
   Path := GetCurrentAssetPath();
   SetCurrentDir(Path + '\texture');
   pgm := TGLPGMImage.Create;
 end;
 
-procedure TForm1.Button1Click(Sender: TObject);
+procedure TFormST.Button1Click(Sender: TObject);
 var
   timer: Cardinal;
   bmp32: TGLBitmap32;
@@ -106,12 +106,12 @@ begin
   bmp32.Free;
 end;
 
-procedure TForm1.FormDestroy(Sender: TObject);
+procedure TFormST.FormDestroy(Sender: TObject);
 begin
    pgm.Destroy;
 end;
 
-procedure TForm1.TurnPictureParameterSetup(Sender: TObject);
+procedure TFormST.TurnPictureParameterSetup(Sender: TObject);
 begin
   with TurnPicture do
   begin

+ 3 - 611
Examples/Demos/computing/StableFluids/fFluidsD.dfm

@@ -1,4 +1,4 @@
-object Form1: TForm1
+object FormSF: TFormSF
   Left = 0
   Top = 0
   Caption = 'GLScene CUDA Fluids'
@@ -173,9 +173,7 @@ object Form1: TForm1
         ''
         #9'//-----------------------------------------------------------'
         
-          #9'// Compiling C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_000009' +
-          'd4_00000000-11_temp.cpp3.i (C:/Users/YARUNA~1/AppData/Local/Temp' +
-          '/ccBI#.a02624)'
+          #9'// Compiling C:/Users/VPV~1/AppData/Local/Temp/...)'
         #9'//-----------------------------------------------------------'
         ''
         #9'//-----------------------------------------------------------'
@@ -187,613 +185,7 @@ object Form1: TForm1
         #9'//  -m2'#9'(Report advisories)'
         #9'//-----------------------------------------------------------'
         ''
-        
-          #9'.file'#9'1'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_000009d4_0' +
-          '0000000-10_temp.cudafe2.gpu"'
-        
-          #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
-          'E\crtdefs.h"'
-        
-          #9'.file'#9'3'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
-          '2\include\crt/device_runtime.h"'
-        
-          #9'.file'#9'4'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
-          '2\include\host_defines.h"'
-        
-          #9'.file'#9'5'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.' +
-          '2\include\builtin_types.h"'
-        
-          #9'.file'#9'6'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\device_types.h"'
-        
-          #9'.file'#9'7'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\driver_types.h"'
-        
-          #9'.file'#9'8'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\surface_types.h"'
-        
-          #9'.file'#9'9'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3.' +
-          '2\include\texture_types.h"'
-        
-          #9'.file'#9'10'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\vector_types.h"'
-        
-          #9'.file'#9'11'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\builtin_types.h"'
-        
-          #9'.file'#9'12'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\host_defines.h"'
-        
-          #9'.file'#9'13'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3' +
-          '.2\include\device_launch_parameters.h"'
-        
-          #9'.file'#9'14'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\crt\storage_class.h"'
-        
-          #9'.file'#9'15'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' +
-          'DE\time.h"'
-        #9'.file'#9'16'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/temp.cu"'
-        
-          #9'.file'#9'17'#9'"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3' +
-          '.2\include\common_functions.h"'
-        
-          #9'.file'#9'18'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\math_functions.h"'
-        
-          #9'.file'#9'19'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\math_constants.h"'
-        
-          #9'.file'#9'20'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\device_functions.h"'
-        
-          #9'.file'#9'21'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_11_atomic_functions.h"'
-        
-          #9'.file'#9'22'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_12_atomic_functions.h"'
-        
-          #9'.file'#9'23'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_13_double_functions.h"'
-        
-          #9'.file'#9'24'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_20_atomic_functions.h"'
-        
-          #9'.file'#9'25'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\sm_20_intrinsics.h"'
-        
-          #9'.file'#9'26'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\surface_functions.h"'
-        
-          #9'.file'#9'27'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\texture_fetch_functions.h"'
-        
-          #9'.file'#9'28'#9'"c:\program files\nvidia gpu computing toolkit\cuda\v3' +
-          '.2\include\math_functions_dbl_ptx3.h"'
-        ''
-        ''
-        #9'.entry addForces_k ('
-        #9#9'.param .u32 __cudaparm_addForces_k_v,'
-        #9#9'.param .s32 __cudaparm_addForces_k_dx,'
-        #9#9'.param .s32 __cudaparm_addForces_k_dy,'
-        #9#9'.param .s32 __cudaparm_addForces_k_spx,'
-        #9#9'.param .s32 __cudaparm_addForces_k_spy,'
-        #9#9'.param .f32 __cudaparm_addForces_k_fx,'
-        #9#9'.param .f32 __cudaparm_addForces_k_fy,'
-        #9#9'.param .s32 __cudaparm_addForces_k_r,'
-        #9#9'.param .u32 __cudaparm_addForces_k_pitch)'
-        #9'{'
-        #9'.reg .u32 %r<24>;'
-        #9'.reg .f32 %f<14>;'
-        #9'.loc'#9'16'#9'28'#9'0'
-        '$LDWbegin_addForces_k:'
-        #9'.loc'#9'16'#9'38'#9'0'
-        #9'cvt.s32.u16 '#9'%r1, %tid.x;'
-        #9'mul24.lo.u32 '#9'%r2, %r1, 8;'
-        #9'cvt.s32.u16 '#9'%r3, %tid.y;'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_addForces_k_spy];'
-        #9'add.s32 '#9'%r5, %r4, %r3;'
-        #9'ld.param.u32 '#9'%r6, [__cudaparm_addForces_k_spx];'
-        #9'mul.lo.u32 '#9'%r7, %r6, 8;'
-        #9'ld.param.s32 '#9'%r8, [__cudaparm_addForces_k_r];'
-        #9'sub.s32 '#9'%r9, %r3, %r8;'
-        #9'sub.s32 '#9'%r10, %r1, %r8;'
-        #9'ld.param.u32 '#9'%r11, [__cudaparm_addForces_k_pitch];'
-        #9'mul.lo.u32 '#9'%r12, %r5, %r11;'
-        #9'mul.lo.s32 '#9'%r13, %r9, %r9;'
-        #9'mul.lo.s32 '#9'%r14, %r10, %r10;'
-        #9'add.u32 '#9'%r15, %r2, %r12;'
-        #9'mul.lo.s32 '#9'%r16, %r9, %r13;'
-        #9'mul.lo.s32 '#9'%r17, %r10, %r14;'
-        #9'ld.param.u32 '#9'%r18, [__cudaparm_addForces_k_v];'
-        #9'add.u32 '#9'%r19, %r18, %r15;'
-        #9'mul.lo.s32 '#9'%r20, %r9, %r16;'
-        #9'mul.lo.s32 '#9'%r21, %r10, %r17;'
-        #9'add.u32 '#9'%r22, %r7, %r19;'
-        #9'cvt.rn.f32.s32 '#9'%f1, %r20;'
-        #9'cvt.rn.f32.s32 '#9'%f2, %r21;'
-        #9'mov.f32 '#9'%f3, 0f3f800000;     '#9'// 1'
-        #9'add.f32 '#9'%f4, %f2, %f3;'
-        #9'add.f32 '#9'%f5, %f1, %f4;'
-        #9'rcp.approx.f32 '#9'%f6, %f5;'
-        #9'ld.global.v2.f32 '#9'{%f7,%f8}, [%r22+0];'
-        #9'ld.param.f32 '#9'%f9, [__cudaparm_addForces_k_fy];'
-        #9'mad.f32 '#9'%f10, %f9, %f6, %f8;'
-        #9'.loc'#9'16'#9'39'#9'0'
-        #9'ld.param.f32 '#9'%f11, [__cudaparm_addForces_k_fx];'
-        #9'mad.f32 '#9'%f12, %f11, %f6, %f7;'
-        #9'st.global.v2.f32 '#9'[%r22+0], {%f12,%f10};'
-        #9'.loc'#9'16'#9'40'#9'0'
-        #9'exit;'
-        '$LDWend_addForces_k:'
-        #9'} // addForces_k'
-        #9'.tex .u32 texref;'
-        ''
-        #9'.entry advectVelocity_k ('
-        #9#9'.param .u32 __cudaparm_advectVelocity_k_vx,'
-        #9#9'.param .u32 __cudaparm_advectVelocity_k_vy,'
-        #9#9'.param .s32 __cudaparm_advectVelocity_k_dx,'
-        #9#9'.param .s32 __cudaparm_advectVelocity_k_pdx,'
-        #9#9'.param .s32 __cudaparm_advectVelocity_k_dy,'
-        #9#9'.param .f32 __cudaparm_advectVelocity_k_dt,'
-        #9#9'.param .s32 __cudaparm_advectVelocity_k_lb)'
-        #9'{'
-        #9'.reg .u16 %rh<4>;'
-        #9'.reg .u32 %r<28>;'
-        #9'.reg .f32 %f<41>;'
-        #9'.reg .pred %p<6>;'
-        #9'.loc'#9'16'#9'48'#9'0'
-        '$LDWbegin_advectVelocity_k:'
-        #9'mov.u16 '#9'%rh1, %ctaid.x;'
-        #9'mov.u16 '#9'%rh2, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r1, %rh1, %rh2;'
-        #9'cvt.u32.u16 '#9'%r2, %tid.x;'
-        #9'add.u32 '#9'%r3, %r2, %r1;'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_advectVelocity_k_dx];'
-        #9'setp.le.s32 '#9'%p1, %r4, %r3;'
-        #9'@%p1 bra '#9'$Lt_1_2818;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_advectVelocity_k_lb];'
-        #9'mov.u32 '#9'%r6, 0;'
-        #9'setp.le.s32 '#9'%p2, %r5, %r6;'
-        #9'@%p2 bra '#9'$Lt_1_3330;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_advectVelocity_k_lb];'
-        #9'mov.s32 '#9'%r7, %r5;'
-        #9'cvt.u32.u16 '#9'%r8, %tid.y;'
-        #9'mul.lo.u32 '#9'%r9, %r8, %r5;'
-        #9'cvt.u32.u16 '#9'%r10, %ntid.y;'
-        #9'mul.lo.u32 '#9'%r11, %r10, %r5;'
-        #9'cvt.u32.u16 '#9'%r12, %ctaid.y;'
-        #9'mul.lo.u32 '#9'%r13, %r12, %r11;'
-        #9'add.s32 '#9'%r14, %r9, %r13;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_advectVelocity_k_dy];'
-        #9'mov.s32 '#9'%r16, 0;'
-        #9'mov.s32 '#9'%r17, %r7;'
-        '$Lt_1_3842:'
-        
-          ' //<loop> Loop body line 48, nesting depth: 1, estimated iterati' +
-          'ons: unknown'
-        #9'add.s32 '#9'%r18, %r14, %r16;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_advectVelocity_k_dy];'
-        #9'setp.ge.s32 '#9'%p3, %r18, %r15;'
-        #9'@%p3 bra '#9'$Lt_1_4098;'
-        ' //<loop> Part of loop body line 48, head labeled $Lt_1_3842'
-        #9'cvt.rn.f32.s32 '#9'%f1, %r3;'
-        #9'cvt.rn.f32.s32 '#9'%f2, %r18;'
-        #9'mov.f32 '#9'%f3, %f1;'
-        #9'mov.f32 '#9'%f4, %f2;'
-        #9'mov.f32 '#9'%f5, 0f00000000;     '#9'// 0'
-        #9'mov.f32 '#9'%f6, %f5;'
-        #9'mov.f32 '#9'%f7, 0f00000000;     '#9'// 0'
-        #9'mov.f32 '#9'%f8, %f7;'
-        
-          #9'tex.2d.v4.f32.f32 {%f9,%f10,%f11,%f12},[texref,{%f3,%f4,%f6,%f8' +
-          '}];'
-        ' //<loop> Part of loop body line 48, head labeled $Lt_1_3842'
-        #9'.loc'#9'16'#9'63'#9'0'
-        #9'mov.f32 '#9'%f13, %f9;'
-        #9'mov.f32 '#9'%f14, %f10;'
-        #9'ld.param.f32 '#9'%f15, [__cudaparm_advectVelocity_k_dt];'
-        #9'mov.f32 '#9'%f16, 0f3f000000;    '#9'// 0.5'
-        #9'add.f32 '#9'%f17, %f1, %f16;'
-        #9'.loc'#9'16'#9'48'#9'0'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_advectVelocity_k_dx];'
-        #9'.loc'#9'16'#9'63'#9'0'
-        #9'cvt.rn.f32.s32 '#9'%f18, %r4;'
-        #9'mul.f32 '#9'%f19, %f13, %f15;'
-        #9'mul.f32 '#9'%f20, %f18, %f19;'
-        #9'sub.f32 '#9'%f21, %f17, %f20;'
-        #9'mov.f32 '#9'%f22, %f21;'
-        #9'mov.f32 '#9'%f23, 0f3f000000;    '#9'// 0.5'
-        #9'add.f32 '#9'%f24, %f2, %f23;'
-        #9'.loc'#9'16'#9'48'#9'0'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_advectVelocity_k_dy];'
-        #9'.loc'#9'16'#9'63'#9'0'
-        #9'cvt.rn.f32.s32 '#9'%f25, %r15;'
-        #9'mul.f32 '#9'%f26, %f14, %f15;'
-        #9'mul.f32 '#9'%f27, %f25, %f26;'
-        #9'sub.f32 '#9'%f28, %f24, %f27;'
-        #9'mov.f32 '#9'%f29, %f28;'
-        #9'mov.f32 '#9'%f30, 0f00000000;    '#9'// 0'
-        #9'mov.f32 '#9'%f31, %f30;'
-        #9'mov.f32 '#9'%f32, 0f00000000;    '#9'// 0'
-        #9'mov.f32 '#9'%f33, %f32;'
-        
-          #9'tex.2d.v4.f32.f32 {%f34,%f35,%f36,%f37},[texref,{%f22,%f29,%f31' +
-          ',%f33}];'
-        ' //<loop> Part of loop body line 48, head labeled $Lt_1_3842'
-        #9'.loc'#9'16'#9'66'#9'0'
-        #9'mov.f32 '#9'%f38, %f34;'
-        #9'mov.f32 '#9'%f39, %f35;'
-        #9'.loc'#9'16'#9'68'#9'0'
-        #9'ld.param.s32 '#9'%r19, [__cudaparm_advectVelocity_k_pdx];'
-        #9'mul.lo.s32 '#9'%r20, %r19, %r18;'
-        #9'add.s32 '#9'%r21, %r20, %r3;'
-        #9'mul.lo.u32 '#9'%r22, %r21, 4;'
-        #9'ld.param.u32 '#9'%r23, [__cudaparm_advectVelocity_k_vx];'
-        #9'add.u32 '#9'%r24, %r23, %r22;'
-        #9'st.global.f32 '#9'[%r24+0], %f38;'
-        #9'.loc'#9'16'#9'69'#9'0'
-        #9'ld.param.u32 '#9'%r25, [__cudaparm_advectVelocity_k_vy];'
-        #9'add.u32 '#9'%r26, %r25, %r22;'
-        #9'st.global.f32 '#9'[%r26+0], %f39;'
-        '$Lt_1_4098:'
-        ' //<loop> Part of loop body line 48, head labeled $Lt_1_3842'
-        #9'add.s32 '#9'%r16, %r16, 1;'
-        #9'.loc'#9'16'#9'48'#9'0'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_advectVelocity_k_lb];'
-        #9'.loc'#9'16'#9'69'#9'0'
-        #9'setp.ne.s32 '#9'%p4, %r16, %r5;'
-        #9'@%p4 bra '#9'$Lt_1_3842;'
-        '$Lt_1_3330:'
-        '$Lt_1_2818:'
-        #9'.loc'#9'16'#9'73'#9'0'
-        #9'exit;'
-        '$LDWend_advectVelocity_k:'
-        #9'} // advectVelocity_k'
-        ''
-        #9'.entry diffuseProject_k ('
-        #9#9'.param .u32 __cudaparm_diffuseProject_k_vx,'
-        #9#9'.param .u32 __cudaparm_diffuseProject_k_vy,'
-        #9#9'.param .s32 __cudaparm_diffuseProject_k_dx,'
-        #9#9'.param .s32 __cudaparm_diffuseProject_k_dy,'
-        #9#9'.param .f32 __cudaparm_diffuseProject_k_dt,'
-        #9#9'.param .f32 __cudaparm_diffuseProject_k_visc,'
-        #9#9'.param .s32 __cudaparm_diffuseProject_k_lb)'
-        #9'{'
-        #9'.reg .u16 %rh<4>;'
-        #9'.reg .u32 %r<37>;'
-        #9'.reg .f32 %f<31>;'
-        #9'.reg .pred %p<8>;'
-        #9'.loc'#9'16'#9'91'#9'0'
-        '$LDWbegin_diffuseProject_k:'
-        #9'mov.u16 '#9'%rh1, %ctaid.x;'
-        #9'mov.u16 '#9'%rh2, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r1, %rh1, %rh2;'
-        #9'cvt.u32.u16 '#9'%r2, %tid.x;'
-        #9'add.u32 '#9'%r3, %r2, %r1;'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_diffuseProject_k_dx];'
-        #9'setp.le.s32 '#9'%p1, %r4, %r3;'
-        #9'@%p1 bra '#9'$Lt_2_4354;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_diffuseProject_k_lb];'
-        #9'mov.u32 '#9'%r6, 0;'
-        #9'setp.le.s32 '#9'%p2, %r5, %r6;'
-        #9'@%p2 bra '#9'$Lt_2_4866;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_diffuseProject_k_lb];'
-        #9'mov.s32 '#9'%r7, %r5;'
-        #9'cvt.u32.u16 '#9'%r8, %tid.y;'
-        #9'mul.lo.u32 '#9'%r9, %r8, %r5;'
-        #9'cvt.u32.u16 '#9'%r10, %ntid.y;'
-        #9'mul.lo.u32 '#9'%r11, %r10, %r5;'
-        #9'cvt.u32.u16 '#9'%r12, %ctaid.y;'
-        #9'mul.lo.u32 '#9'%r13, %r12, %r11;'
-        #9'add.s32 '#9'%r14, %r9, %r13;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_diffuseProject_k_dy];'
-        #9'mov.s32 '#9'%r16, 0;'
-        #9'mov.s32 '#9'%r17, %r7;'
-        '$Lt_2_5378:'
-        
-          ' //<loop> Loop body line 91, nesting depth: 1, estimated iterati' +
-          'ons: unknown'
-        #9'add.s32 '#9'%r18, %r14, %r16;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_diffuseProject_k_dy];'
-        #9'setp.ge.s32 '#9'%p3, %r18, %r15;'
-        #9'@%p3 bra '#9'$Lt_2_5634;'
-        ' //<loop> Part of loop body line 91, head labeled $Lt_2_5378'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_diffuseProject_k_dy];'
-        #9'.loc'#9'16'#9'116'#9'0'
-        #9'shr.s32 '#9'%r19, %r15, 31;'
-        #9'mov.s32 '#9'%r20, 1;'
-        #9'and.b32 '#9'%r21, %r19, %r20;'
-        #9'add.s32 '#9'%r22, %r21, %r15;'
-        #9'shr.s32 '#9'%r23, %r22, 1;'
-        #9'ld.param.f32 '#9'%f1, [__cudaparm_diffuseProject_k_dt];'
-        #9'ld.param.f32 '#9'%f2, [__cudaparm_diffuseProject_k_visc];'
-        #9'mul.f32 '#9'%f3, %f1, %f2;'
-        #9'mul.lo.s32 '#9'%r24, %r3, %r3;'
-        #9'.loc'#9'16'#9'91'#9'0'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_diffuseProject_k_dx];'
-        #9'.loc'#9'16'#9'116'#9'0'
-        #9'mul.lo.s32 '#9'%r25, %r18, %r4;'
-        #9'setp.lt.s32 '#9'%p4, %r23, %r18;'
-        #9'sub.s32 '#9'%r26, %r18, %r15;'
-        #9'add.s32 '#9'%r27, %r25, %r3;'
-        #9'selp.s32 '#9'%r28, %r26, %r18, %p4;'
-        #9'mul.lo.u32 '#9'%r29, %r27, 8;'
-        #9'mul.lo.s32 '#9'%r30, %r28, %r28;'
-        #9'ld.param.u32 '#9'%r31, [__cudaparm_diffuseProject_k_vx];'
-        #9'add.u32 '#9'%r32, %r31, %r29;'
-        #9'add.s32 '#9'%r33, %r24, %r30;'
-        #9'cvt.rn.f32.s32 '#9'%f4, %r33;'
-        #9'mov.f32 '#9'%f5, 0f3f800000;     '#9'// 1'
-        #9'mad.f32 '#9'%f6, %f4, %f3, %f5;'
-        #9'rcp.approx.f32 '#9'%f7, %f6;'
-        #9'ld.global.v2.f32 '#9'{%f8,%f9}, [%r32+0];'
-        #9'mul.f32 '#9'%f10, %f8, %f7;'
-        #9'mul.f32 '#9'%f11, %f9, %f7;'
-        #9'.loc'#9'16'#9'117'#9'0'
-        #9'ld.param.u32 '#9'%r34, [__cudaparm_diffuseProject_k_vy];'
-        #9'add.u32 '#9'%r35, %r34, %r29;'
-        #9'ld.global.v2.f32 '#9'{%f12,%f13}, [%r35+0];'
-        #9'mul.f32 '#9'%f14, %f12, %f7;'
-        #9'mul.f32 '#9'%f15, %f13, %f7;'
-        #9'mov.f32 '#9'%f16, 0f00000000;    '#9'// 0'
-        #9'setp.gt.f32 '#9'%p5, %f4, %f16;'
-        #9'@!%p5 bra '#9'$Lt_2_6146;'
-        ' //<loop> Part of loop body line 91, head labeled $Lt_2_5378'
-        #9'.loc'#9'16'#9'123'#9'0'
-        #9'cvt.rn.f32.s32 '#9'%f17, %r3;'
-        #9'cvt.rn.f32.s32 '#9'%f18, %r28;'
-        #9'mul.f32 '#9'%f19, %f18, %f14;'
-        #9'mad.f32 '#9'%f20, %f17, %f10, %f19;'
-        #9'.loc'#9'16'#9'125'#9'0'
-        #9'mul.f32 '#9'%f21, %f18, %f15;'
-        #9'mad.f32 '#9'%f22, %f17, %f11, %f21;'
-        #9'.loc'#9'16'#9'126'#9'0'
-        #9'rcp.approx.f32 '#9'%f23, %f4;'
-        #9'mul.f32 '#9'%f24, %f20, %f23;'
-        #9'mul.f32 '#9'%f25, %f17, %f24;'
-        #9'sub.f32 '#9'%f10, %f10, %f25;'
-        #9'.loc'#9'16'#9'127'#9'0'
-        #9'mul.f32 '#9'%f26, %f22, %f23;'
-        #9'mul.f32 '#9'%f27, %f17, %f26;'
-        #9'sub.f32 '#9'%f11, %f11, %f27;'
-        #9'.loc'#9'16'#9'128'#9'0'
-        #9'mul.f32 '#9'%f28, %f18, %f24;'
-        #9'sub.f32 '#9'%f14, %f14, %f28;'
-        #9'.loc'#9'16'#9'129'#9'0'
-        #9'mul.f32 '#9'%f29, %f18, %f26;'
-        #9'sub.f32 '#9'%f15, %f15, %f29;'
-        '$Lt_2_6146:'
-        ' //<loop> Part of loop body line 91, head labeled $Lt_2_5378'
-        #9'st.global.v2.f32 '#9'[%r32+0], {%f10,%f11};'
-        #9'st.global.v2.f32 '#9'[%r35+0], {%f14,%f15};'
-        '$Lt_2_5634:'
-        ' //<loop> Part of loop body line 91, head labeled $Lt_2_5378'
-        #9'.loc'#9'16'#9'133'#9'0'
-        #9'add.s32 '#9'%r16, %r16, 1;'
-        #9'.loc'#9'16'#9'91'#9'0'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_diffuseProject_k_lb];'
-        #9'.loc'#9'16'#9'133'#9'0'
-        #9'setp.ne.s32 '#9'%p6, %r16, %r5;'
-        #9'@%p6 bra '#9'$Lt_2_5378;'
-        '$Lt_2_4866:'
-        '$Lt_2_4354:'
-        #9'.loc'#9'16'#9'137'#9'0'
-        #9'exit;'
-        '$LDWend_diffuseProject_k:'
-        #9'} // diffuseProject_k'
-        ''
-        #9'.entry updateVelocity_k ('
-        #9#9'.param .u32 __cudaparm_updateVelocity_k_v,'
-        #9#9'.param .u32 __cudaparm_updateVelocity_k_vx,'
-        #9#9'.param .u32 __cudaparm_updateVelocity_k_vy,'
-        #9#9'.param .s32 __cudaparm_updateVelocity_k_dx,'
-        #9#9'.param .s32 __cudaparm_updateVelocity_k_pdx,'
-        #9#9'.param .s32 __cudaparm_updateVelocity_k_dy,'
-        #9#9'.param .s32 __cudaparm_updateVelocity_k_lb,'
-        #9#9'.param .u32 __cudaparm_updateVelocity_k_pitch,'
-        #9#9'.param .f32 __cudaparm_updateVelocity_k_scale)'
-        #9'{'
-        #9'.reg .u16 %rh<4>;'
-        #9'.reg .u32 %r<34>;'
-        #9'.reg .f32 %f<7>;'
-        #9'.reg .pred %p<6>;'
-        #9'.loc'#9'16'#9'152'#9'0'
-        '$LDWbegin_updateVelocity_k:'
-        #9'mov.u16 '#9'%rh1, %ctaid.x;'
-        #9'mov.u16 '#9'%rh2, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r1, %rh1, %rh2;'
-        #9'cvt.u32.u16 '#9'%r2, %tid.x;'
-        #9'add.u32 '#9'%r3, %r2, %r1;'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_updateVelocity_k_dx];'
-        #9'setp.le.s32 '#9'%p1, %r4, %r3;'
-        #9'@%p1 bra '#9'$Lt_3_2818;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_updateVelocity_k_lb];'
-        #9'mov.u32 '#9'%r6, 0;'
-        #9'setp.le.s32 '#9'%p2, %r5, %r6;'
-        #9'@%p2 bra '#9'$Lt_3_3330;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_updateVelocity_k_lb];'
-        #9'mov.s32 '#9'%r7, %r5;'
-        #9'cvt.u32.u16 '#9'%r8, %tid.y;'
-        #9'mul.lo.u32 '#9'%r9, %r8, %r5;'
-        #9'cvt.u32.u16 '#9'%r10, %ntid.y;'
-        #9'mul.lo.u32 '#9'%r11, %r10, %r5;'
-        #9'cvt.u32.u16 '#9'%r12, %ctaid.y;'
-        #9'mul.lo.u32 '#9'%r13, %r12, %r11;'
-        #9'add.s32 '#9'%r14, %r9, %r13;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_updateVelocity_k_dy];'
-        #9'mov.s32 '#9'%r16, 0;'
-        #9'mov.s32 '#9'%r17, %r7;'
-        '$Lt_3_3842:'
-        
-          ' //<loop> Loop body line 152, nesting depth: 1, estimated iterat' +
-          'ions: unknown'
-        #9'add.s32 '#9'%r18, %r14, %r16;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_updateVelocity_k_dy];'
-        #9'setp.ge.s32 '#9'%p3, %r18, %r15;'
-        #9'@%p3 bra '#9'$Lt_3_4098;'
-        ' //<loop> Part of loop body line 152, head labeled $Lt_3_3842'
-        #9'.loc'#9'16'#9'168'#9'0'
-        #9'ld.param.s32 '#9'%r19, [__cudaparm_updateVelocity_k_pdx];'
-        #9'mul.lo.s32 '#9'%r20, %r19, %r18;'
-        #9'add.s32 '#9'%r21, %r20, %r3;'
-        #9'mul.lo.u32 '#9'%r22, %r21, 4;'
-        #9'ld.param.u32 '#9'%r23, [__cudaparm_updateVelocity_k_vy];'
-        #9'add.u32 '#9'%r24, %r23, %r22;'
-        #9'ld.global.f32 '#9'%f1, [%r24+0];'
-        #9'.loc'#9'16'#9'175'#9'0'
-        #9'mul.lo.u32 '#9'%r25, %r3, 8;'
-        #9'ld.param.u32 '#9'%r26, [__cudaparm_updateVelocity_k_pitch];'
-        #9'mul.lo.u32 '#9'%r27, %r18, %r26;'
-        #9'add.u32 '#9'%r28, %r25, %r27;'
-        #9'ld.param.u32 '#9'%r29, [__cudaparm_updateVelocity_k_v];'
-        #9'add.u32 '#9'%r30, %r29, %r28;'
-        #9'ld.param.f32 '#9'%f2, [__cudaparm_updateVelocity_k_scale];'
-        #9'ld.param.u32 '#9'%r31, [__cudaparm_updateVelocity_k_vx];'
-        #9'add.u32 '#9'%r32, %r31, %r22;'
-        #9'ld.global.f32 '#9'%f3, [%r32+0];'
-        #9'mul.f32 '#9'%f4, %f3, %f2;'
-        #9'mul.f32 '#9'%f5, %f1, %f2;'
-        #9'st.global.v2.f32 '#9'[%r30+0], {%f4,%f5};'
-        '$Lt_3_4098:'
-        ' //<loop> Part of loop body line 152, head labeled $Lt_3_3842'
-        #9'add.s32 '#9'%r16, %r16, 1;'
-        #9'.loc'#9'16'#9'152'#9'0'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_updateVelocity_k_lb];'
-        #9'.loc'#9'16'#9'175'#9'0'
-        #9'setp.ne.s32 '#9'%p4, %r16, %r5;'
-        #9'@%p4 bra '#9'$Lt_3_3842;'
-        '$Lt_3_3330:'
-        '$Lt_3_2818:'
-        #9'.loc'#9'16'#9'179'#9'0'
-        #9'exit;'
-        '$LDWend_updateVelocity_k:'
-        #9'} // updateVelocity_k'
-        ''
-        #9'.entry advectParticles_k ('
-        #9#9'.param .u32 __cudaparm_advectParticles_k_part,'
-        #9#9'.param .u32 __cudaparm_advectParticles_k_v,'
-        #9#9'.param .s32 __cudaparm_advectParticles_k_dx,'
-        #9#9'.param .s32 __cudaparm_advectParticles_k_dy,'
-        #9#9'.param .f32 __cudaparm_advectParticles_k_dt,'
-        #9#9'.param .s32 __cudaparm_advectParticles_k_lb,'
-        #9#9'.param .u32 __cudaparm_advectParticles_k_pitch)'
-        #9'{'
-        #9'.reg .u16 %rh<4>;'
-        #9'.reg .u32 %r<40>;'
-        #9'.reg .f32 %f<25>;'
-        #9'.reg .pred %p<6>;'
-        
-          #9'.local .align 8 .b8 __cuda___cuda_local_var_86465_19_non_const_' +
-          'vterm_168[8];'
-        #9'.loc'#9'16'#9'192'#9'0'
-        '$LDWbegin_advectParticles_k:'
-        #9'mov.u16 '#9'%rh1, %ctaid.x;'
-        #9'mov.u16 '#9'%rh2, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r1, %rh1, %rh2;'
-        #9'cvt.u32.u16 '#9'%r2, %tid.x;'
-        #9'add.u32 '#9'%r3, %r2, %r1;'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_advectParticles_k_dx];'
-        #9'setp.le.s32 '#9'%p1, %r4, %r3;'
-        #9'@%p1 bra '#9'$Lt_4_2818;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_advectParticles_k_lb];'
-        #9'mov.u32 '#9'%r6, 0;'
-        #9'setp.le.s32 '#9'%p2, %r5, %r6;'
-        #9'@%p2 bra '#9'$Lt_4_3330;'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_advectParticles_k_lb];'
-        #9'mov.s32 '#9'%r7, %r5;'
-        #9'cvt.u32.u16 '#9'%r8, %tid.y;'
-        #9'mul.lo.u32 '#9'%r9, %r8, %r5;'
-        #9'cvt.u32.u16 '#9'%r10, %ntid.y;'
-        #9'mul.lo.u32 '#9'%r11, %r10, %r5;'
-        #9'cvt.u32.u16 '#9'%r12, %ctaid.y;'
-        #9'mul.lo.u32 '#9'%r13, %r12, %r11;'
-        #9'add.s32 '#9'%r14, %r9, %r13;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_advectParticles_k_dy];'
-        #9'mov.s32 '#9'%r16, 0;'
-        #9'mov.s32 '#9'%r17, %r7;'
-        '$Lt_4_3842:'
-        
-          ' //<loop> Loop body line 192, nesting depth: 1, estimated iterat' +
-          'ions: unknown'
-        #9'add.s32 '#9'%r18, %r14, %r16;'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_advectParticles_k_dy];'
-        #9'setp.ge.s32 '#9'%p3, %r18, %r15;'
-        #9'@%p3 bra '#9'$Lt_4_4098;'
-        ' //<loop> Part of loop body line 192, head labeled $Lt_4_3842'
-        #9'ld.param.s32 '#9'%r4, [__cudaparm_advectParticles_k_dx];'
-        #9'.loc'#9'16'#9'206'#9'0'
-        #9'mul.lo.s32 '#9'%r19, %r18, %r4;'
-        #9'add.s32 '#9'%r20, %r19, %r3;'
-        #9'mul.lo.u32 '#9'%r21, %r20, 8;'
-        #9'ld.param.u32 '#9'%r22, [__cudaparm_advectParticles_k_part];'
-        #9'add.u32 '#9'%r23, %r22, %r21;'
-        #9'ld.global.v2.f32 '#9'{%f1,%f2}, [%r23+0];'
-        #9'.loc'#9'16'#9'210'#9'0'
-        #9'ld.param.u32 '#9'%r24, [__cudaparm_advectParticles_k_v];'
-        #9'ld.param.u32 '#9'%r25, [__cudaparm_advectParticles_k_pitch];'
-        #9'.loc'#9'16'#9'192'#9'0'
-        #9'ld.param.s32 '#9'%r15, [__cudaparm_advectParticles_k_dy];'
-        #9'.loc'#9'16'#9'210'#9'0'
-        #9'cvt.rn.f32.s32 '#9'%f3, %r15;'
-        #9'mul.f32 '#9'%f4, %f3, %f2;'
-        #9'cvt.rzi.s32.f32 '#9'%r26, %f4;'
-        #9'mul.lo.u32 '#9'%r27, %r25, %r26;'
-        #9'cvt.rn.f32.s32 '#9'%f5, %r4;'
-        #9'mul.f32 '#9'%f6, %f5, %f1;'
-        #9'cvt.rzi.s32.f32 '#9'%r28, %f6;'
-        #9'mul.lo.u32 '#9'%r29, %r28, 8;'
-        #9'add.u32 '#9'%r30, %r27, %r29;'
-        #9'add.u32 '#9'%r31, %r24, %r30;'
-        
-          #9'mov.u32 '#9'%r32, __cuda___cuda_local_var_86465_19_non_const_vterm' +
-          '_168;'
-        #9'ld.global.v2.u32 '#9'{%r33,%r34}, [%r31+0];'
-        #9'st.local.u32 '#9'[%r32+0], %r33;'
-        #9'st.local.u32 '#9'[%r32+4], %r34;'
-        #9'.loc'#9'16'#9'212'#9'0'
-        #9'ld.param.f32 '#9'%f7, [__cudaparm_advectParticles_k_dt];'
-        
-          #9'ld.local.f32 '#9'%f8, [__cuda___cuda_local_var_86465_19_non_const_' +
-          'vterm_168+0];'
-        #9'mad.f32 '#9'%f9, %f7, %f8, %f1;'
-        #9'.loc'#9'16'#9'213'#9'0'
-        #9'cvt.rzi.s32.f32 '#9'%r35, %f9;'
-        #9'cvt.rn.f32.s32 '#9'%f10, %r35;'
-        #9'sub.f32 '#9'%f11, %f9, %f10;'
-        #9'.loc'#9'16'#9'215'#9'0'
-        #9'mov.f32 '#9'%f12, 0f3f800000;    '#9'// 1'
-        #9'add.f32 '#9'%f13, %f11, %f12;'
-        #9'cvt.rzi.s32.f32 '#9'%r36, %f13;'
-        #9'cvt.rn.f32.s32 '#9'%f14, %r36;'
-        #9'sub.f32 '#9'%f15, %f13, %f14;'
-        #9'.loc'#9'16'#9'216'#9'0'
-        
-          #9'ld.local.f32 '#9'%f16, [__cuda___cuda_local_var_86465_19_non_const' +
-          '_vterm_168+4];'
-        #9'mad.f32 '#9'%f17, %f7, %f16, %f2;'
-        #9'.loc'#9'16'#9'217'#9'0'
-        #9'cvt.rzi.s32.f32 '#9'%r37, %f17;'
-        #9'cvt.rn.f32.s32 '#9'%f18, %r37;'
-        #9'sub.f32 '#9'%f19, %f17, %f18;'
-        #9'.loc'#9'16'#9'219'#9'0'
-        #9'mov.f32 '#9'%f20, 0f3f800000;    '#9'// 1'
-        #9'add.f32 '#9'%f21, %f19, %f20;'
-        #9'cvt.rzi.s32.f32 '#9'%r38, %f21;'
-        #9'cvt.rn.f32.s32 '#9'%f22, %r38;'
-        #9'sub.f32 '#9'%f23, %f21, %f22;'
-        #9'st.global.v2.f32 '#9'[%r23+0], {%f15,%f23};'
-        '$Lt_4_4098:'
-        ' //<loop> Part of loop body line 192, head labeled $Lt_4_3842'
-        #9'.loc'#9'16'#9'221'#9'0'
-        #9'add.s32 '#9'%r16, %r16, 1;'
-        #9'.loc'#9'16'#9'192'#9'0'
-        #9'ld.param.u32 '#9'%r5, [__cudaparm_advectParticles_k_lb];'
-        #9'.loc'#9'16'#9'221'#9'0'
-        #9'setp.ne.s32 '#9'%p4, %r16, %r5;'
-        #9'@%p4 bra '#9'$Lt_4_3842;'
-        '$Lt_4_3330:'
-        '$Lt_4_2818:'
-        #9'.loc'#9'16'#9'225'#9'0'
+		
         #9'exit;'
         '$LDWend_advectParticles_k:'
         #9'} // advectParticles_k'

+ 17 - 17
Examples/Demos/computing/StableFluids/fFluidsD.pas

@@ -7,7 +7,7 @@ uses
   System.Classes,
   Vcl.Controls, 
   Vcl.Forms, 
-  Vcl.StdCtrls, 
+  Vcl.StdCtrls,
   Vcl.Graphics,
 
   GLS.Scene,
@@ -38,7 +38,7 @@ uses
   CUDA.DataAccess;
 
 type
-  TForm1 = class(TForm)
+  TFormSF = class(TForm)
     GLScene1: TGLScene;
     GLSceneViewer1: TGLSceneViewer;
     GLCadencer1: TGLCadencer;
@@ -149,7 +149,7 @@ type
   end;
 
 var
-  Form1: TForm1;
+  FormSF: TFormSF;
 
 implementation
 
@@ -158,7 +158,7 @@ implementation
 var
   InitPosition : Boolean = False;
 
-procedure TForm1.FormCreate(Sender: TObject);
+procedure TFormSF.FormCreate(Sender: TObject);
 var
   i, j: Integer;
   pos: FloatElement.TVector2;
@@ -211,7 +211,7 @@ begin
   clicked := false;
 end;
 
-procedure TForm1.GLSceneViewer1MouseDown(Sender: TObject; Button: TMouseButton;
+procedure TFormSF.GLSceneViewer1MouseDown(Sender: TObject; Button: TMouseButton;
   Shift: TShiftState; X, Y: Integer);
 begin
   lastX := X;
@@ -220,7 +220,7 @@ begin
   ResetButton.MouseDown(Sender, TMouseButton(Button), Shift, X, Y);
 end;
 
-procedure TForm1.GLSceneViewer1MouseMove(Sender: TObject; Shift: TShiftState; X,
+procedure TFormSF.GLSceneViewer1MouseMove(Sender: TObject; Shift: TShiftState; X,
   Y: Integer);
 var
   nx, ny: Integer;
@@ -251,19 +251,19 @@ begin
   ResetButton.MouseMove(Sender, Shift, X, Y);
 end;
 
-procedure TForm1.GLSceneViewer1MouseUp(Sender: TObject; Button: TMouseButton;
+procedure TFormSF.GLSceneViewer1MouseUp(Sender: TObject; Button: TMouseButton;
   Shift: TShiftState; X, Y: Integer);
 begin
   clicked := false;
   ResetButton.MouseUp(Sender, TMouseButton(Button), Shift, X, Y);
 end;
 
-procedure TForm1.GLCUDA1OpenGLInteropInit(out Context: TGLContext);
+procedure TFormSF.GLCUDA1OpenGLInteropInit(out Context: TGLContext);
 begin
   Context := GLSceneViewer1.Buffer.RenderingContext;
 end;
 
-procedure TForm1.BeforeKernelLaunch(
+procedure TFormSF.BeforeKernelLaunch(
   Sender: TGLVertexAttribute);
 begin
   if not InitPosition then
@@ -286,7 +286,7 @@ begin
   // Look at ParticleRenderer.VertexAttributes[0].KernelFunction
 end;
 
-procedure TForm1.addForcesParameterSetup(Sender: TObject);
+procedure TFormSF.addForcesParameterSetup(Sender: TObject);
 begin
   with addForces do
   begin
@@ -304,7 +304,7 @@ begin
   end;
 end;
 
-procedure TForm1.advectVelocityParameterSetup(Sender: TObject);
+procedure TFormSF.advectVelocityParameterSetup(Sender: TObject);
 begin
   VelocityField.CopyTo(ArrayOfTexture);
   with advectVelocity do
@@ -321,7 +321,7 @@ begin
   end;
 end;
 
-procedure TForm1.diffuseProjectParameterSetup(Sender: TObject);
+procedure TFormSF.diffuseProjectParameterSetup(Sender: TObject);
 begin
   with diffuseProject do
   begin
@@ -335,7 +335,7 @@ begin
   end;
 end;
 
-procedure TForm1.updateVelocityParameterSetup(Sender: TObject);
+procedure TFormSF.updateVelocityParameterSetup(Sender: TObject);
 begin
   with updateVelocity do
   begin
@@ -351,7 +351,7 @@ begin
   end;
 end;
 
-procedure TForm1.advectParticlesParameterSetup(Sender: TObject);
+procedure TFormSF.advectParticlesParameterSetup(Sender: TObject);
 begin
   with advectParticles do
   begin
@@ -365,19 +365,19 @@ begin
   end;
 end;
 
-procedure TForm1.ResetButtonButtonClick(Sender: TObject);
+procedure TFormSF.ResetButtonButtonClick(Sender: TObject);
 begin
   InitPosition := false;
 end;
 
-procedure TForm1.GLCadencer1Progress(Sender: TObject; const DeltaTime,
+procedure TFormSF.GLCadencer1Progress(Sender: TObject; const DeltaTime,
   newTime: Double);
 begin
   Self.DeltaTime := 5*DeltaTime;
   GLSceneViewer1.Invalidate;
 end;
 
-procedure TForm1.FluidShaderApply(Shader: TGLCustomGLSLShader);
+procedure TFormSF.FluidShaderApply(Shader: TGLCustomGLSLShader);
 begin
   with CurrentGLContext.GLStates do
   begin

+ 3 - 543
Examples/Demos/computing/VertexDataGeneration/fVertexGenD.dfm

@@ -1,4 +1,4 @@
-object Form1: TForm1
+object FormVDG: TFormVDG
   Left = 423
   Top = 62
   Caption = 'CUDA fit GLScene'
@@ -96,9 +96,7 @@ object Form1: TForm1
         ''
         #9'//-----------------------------------------------------------'
         
-          #9'// Compiling C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_00000a' +
-          'b0_00000000-11_temp.cpp3.i (C:/Users/YARUNA~1/AppData/Local/Temp' +
-          '/ccBI#.a02728)'
+          #9'// Compiling C:/Users/VPV~1/AppData/Local/Temp/...)'
         #9'//-----------------------------------------------------------'
         ''
         #9'//-----------------------------------------------------------'
@@ -110,10 +108,7 @@ object Form1: TForm1
         #9'//  -m2'#9'(Report advisories)'
         #9'//-----------------------------------------------------------'
         ''
-        
-          #9'.file'#9'1'#9'"C:/Users/YARUNA~1/AppData/Local/Temp/tmpxft_00000ab0_0' +
-          '0000000-10_temp.cudafe2.gpu"'
-        
+       
           #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
           'E\crtdefs.h"'
         
@@ -198,541 +193,6 @@ object Form1: TForm1
           '9,98,219,192,221,52,245,209,87,39,252,41,21,68,78,110,131,249,16' +
           '2};'
         ''
-        #9'.entry _Z6kernelP6float4jjf ('
-        #9#9'.param .u32 __cudaparm__Z6kernelP6float4jjf_pos,'
-        #9#9'.param .u32 __cudaparm__Z6kernelP6float4jjf_width,'
-        #9#9'.param .u32 __cudaparm__Z6kernelP6float4jjf_height,'
-        #9#9'.param .f32 __cudaparm__Z6kernelP6float4jjf_time)'
-        #9'{'
-        #9'.reg .u16 %rh<6>;'
-        #9'.reg .u32 %r<180>;'
-        #9'.reg .f32 %f<92>;'
-        #9'.reg .pred %p<26>;'
-        #9'.local .align 4 .b8 __cuda_result_16[28];'
-        #9'.local .align 4 .b8 __cuda_result_44[28];'
-        #9'.loc'#9'28'#9'9'#9'0'
-        '$LDWbegin__Z6kernelP6float4jjf:'
-        #9'.loc'#9'18'#9'1638'#9'0'
-        #9'mov.u16 '#9'%rh1, %ctaid.x;'
-        #9'mov.u16 '#9'%rh2, %ntid.x;'
-        #9'mul.wide.u16 '#9'%r1, %rh1, %rh2;'
-        #9'ld.param.u32 '#9'%r2, [__cudaparm__Z6kernelP6float4jjf_width];'
-        #9'cvt.rn.f32.u32 '#9'%f1, %r2;'
-        #9'cvt.u32.u16 '#9'%r3, %tid.x;'
-        #9'add.u32 '#9'%r4, %r3, %r1;'
-        #9'cvt.rn.f32.u32 '#9'%f2, %r4;'
-        #9'div.full.f32 '#9'%f3, %f2, %f1;'
-        #9'add.f32 '#9'%f4, %f3, %f3;'
-        #9'mov.f32 '#9'%f5, 0fbf800000;     '#9'// -1'
-        #9'add.f32 '#9'%f6, %f4, %f5;'
-        #9'ld.param.f32 '#9'%f7, [__cudaparm__Z6kernelP6float4jjf_time];'
-        #9'mov.f32 '#9'%f8, 0f40800000;     '#9'// 4'
-        #9'mad.f32 '#9'%f9, %f8, %f6, %f7;'
-        #9'abs.f32 '#9'%f10, %f9;'
-        #9'mov.f32 '#9'%f11, 0f00000000;    '#9'// 0'
-        #9'set.eq.u32.f32 '#9'%r5, %f9, %f11;'
-        #9'neg.s32 '#9'%r6, %r5;'
-        #9'mov.f32 '#9'%f12, 0f7f800000;    '#9'// 1.#INF'
-        #9'set.eq.u32.f32 '#9'%r7, %f10, %f12;'
-        #9'neg.s32 '#9'%r8, %r7;'
-        #9'or.b32 '#9'%r9, %r6, %r8;'
-        #9'mov.u32 '#9'%r10, 0;'
-        #9'setp.eq.s32 '#9'%p1, %r9, %r10;'
-        #9'@%p1 bra '#9'$Lt_0_23554;'
-        #9'.loc'#9'18'#9'1639'#9'0'
-        #9'mov.f32 '#9'%f13, 0f00000000;    '#9'// 0'
-        #9'mul.rn.f32 '#9'%f14, %f9, %f13;'
-        #9'mov.u32 '#9'%r11, __cudart_i2opi_f;'
-        #9'bra.uni '#9'$LDWendi___isinff_204_5;'
-        '$Lt_0_23554:'
-        #9'mov.f32 '#9'%f15, 0f473ba700;    '#9'// 48039'
-        #9'setp.gt.f32 '#9'%p2, %f10, %f15;'
-        #9'@!%p2 bra '#9'$Lt_0_24066;'
-        #9'.loc'#9'18'#9'1396'#9'0'
-        #9'mov.u32 '#9'%r11, __cudart_i2opi_f;'
-        #9'mov.b32 '#9'%r12, %f9;'
-        #9'and.b32 '#9'%r13, %r12, -2147483648;'
-        #9'mov.s32 '#9'%r14, %r13;'
-        #9'.loc'#9'18'#9'1405'#9'0'
-        #9'shl.b32 '#9'%r15, %r12, 1;'
-        #9'shr.u32 '#9'%r16, %r15, 24;'
-        #9'sub.u32 '#9'%r17, %r16, 128;'
-        #9'shr.u32 '#9'%r18, %r17, 5;'
-        #9'mov.s32 '#9'%r19, 4;'
-        #9'sub.s32 '#9'%r20, %r19, %r18;'
-        #9'.loc'#9'18'#9'24'#9'0'
-        #9'mov.s32 '#9'%r21, %r11;'
-        #9'add.u32 '#9'%r22, %r11, 24;'
-        #9'mov.u32 '#9'%r23, __cuda_result_16;'
-        #9'shl.b32 '#9'%r24, %r12, 8;'
-        #9'or.b32 '#9'%r25, %r24, -2147483648;'
-        #9'mov.u32 '#9'%r26, 0;'
-        '$Lt_0_25090:'
-        ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
-        #9'.loc'#9'18'#9'1411'#9'0'
-        #9'ld.const.u32 '#9'%r27, [%r21+0];'
-        #9'mul.lo.u32 '#9'%r28, %r27, %r25;'
-        #9'add.u32 '#9'%r29, %r28, %r26;'
-        #9'.loc'#9'18'#9'1412'#9'0'
-        #9'set.gt.u32.u32 '#9'%r30, %r28, %r29;'
-        #9'neg.s32 '#9'%r31, %r30;'
-        #9'mul.hi.u32 '#9'%r32, %r27, %r25;'
-        #9'add.u32 '#9'%r26, %r31, %r32;'
-        #9'.loc'#9'18'#9'1413'#9'0'
-        #9'st.local.u32 '#9'[%r23+0], %r29;'
-        #9'add.u32 '#9'%r23, %r23, 4;'
-        #9'add.u32 '#9'%r21, %r21, 4;'
-        #9'setp.ne.u32 '#9'%p3, %r21, %r22;'
-        #9'@%p3 bra '#9'$Lt_0_25090;'
-        #9'.loc'#9'18'#9'1415'#9'0'
-        #9'st.local.u32 '#9'[__cuda_result_16+24], %r26;'
-        #9'.loc'#9'18'#9'1420'#9'0'
-        #9'mul.lo.u32 '#9'%r33, %r20, 4;'
-        #9'mov.u32 '#9'%r34, __cuda_result_16;'
-        #9'add.u32 '#9'%r35, %r33, %r34;'
-        #9'ld.local.u32 '#9'%r26, [%r35+8];'
-        #9'.loc'#9'18'#9'1421'#9'0'
-        #9'ld.local.u32 '#9'%r36, [%r35+4];'
-        #9'and.b32 '#9'%r37, %r17, 31;'
-        #9'mov.u32 '#9'%r38, 0;'
-        #9'setp.eq.u32 '#9'%p4, %r37, %r38;'
-        #9'@%p4 bra '#9'$Lt_0_25602;'
-        #9'.loc'#9'18'#9'1423'#9'0'
-        #9'mov.s32 '#9'%r39, 32;'
-        #9'sub.s32 '#9'%r40, %r39, %r37;'
-        #9'.loc'#9'18'#9'1424'#9'0'
-        #9'shr.u32 '#9'%r41, %r36, %r40;'
-        #9'shl.b32 '#9'%r42, %r26, %r37;'
-        #9'add.u32 '#9'%r26, %r41, %r42;'
-        #9'.loc'#9'18'#9'1425'#9'0'
-        #9'ld.local.u32 '#9'%r43, [%r35+0];'
-        #9'shr.u32 '#9'%r44, %r43, %r40;'
-        #9'shl.b32 '#9'%r45, %r36, %r37;'
-        #9'add.u32 '#9'%r36, %r44, %r45;'
-        '$Lt_0_25602:'
-        #9'.loc'#9'18'#9'1427'#9'0'
-        #9'shr.u32 '#9'%r40, %r26, 30;'
-        #9'.loc'#9'18'#9'1429'#9'0'
-        #9'shr.u32 '#9'%r46, %r36, 30;'
-        #9'shl.b32 '#9'%r47, %r26, 2;'
-        #9'add.u32 '#9'%r26, %r46, %r47;'
-        #9'.loc'#9'18'#9'1430'#9'0'
-        #9'shl.b32 '#9'%r36, %r36, 2;'
-        #9'mov.u32 '#9'%r48, 0;'
-        #9'setp.eq.u32 '#9'%p5, %r36, %r48;'
-        #9'@%p5 bra '#9'$Lt_0_26370;'
-        #9'.loc'#9'18'#9'1431'#9'0'
-        #9'add.u32 '#9'%r49, %r26, 1;'
-        #9'mov.u32 '#9'%r50, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r51, %r49, %r50;'
-        #9'neg.s32 '#9'%r52, %r51;'
-        #9'bra.uni '#9'$Lt_0_26114;'
-        '$Lt_0_26370:'
-        #9'mov.u32 '#9'%r53, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r54, %r26, %r53;'
-        #9'neg.s32 '#9'%r52, %r54;'
-        '$Lt_0_26114:'
-        #9'.loc'#9'18'#9'1432'#9'0'
-        #9'add.u32 '#9'%r55, %r40, %r52;'
-        #9'.loc'#9'18'#9'1431'#9'0'
-        #9'neg.s32 '#9'%r56, %r55;'
-        #9'mov.u32 '#9'%r57, 0;'
-        #9'setp.ne.u32 '#9'%p6, %r13, %r57;'
-        #9'selp.s32 '#9'%r40, %r56, %r55, %p6;'
-        #9'mov.u32 '#9'%r58, 0;'
-        #9'setp.eq.u32 '#9'%p7, %r52, %r58;'
-        #9'@%p7 bra '#9'$Lt_0_26626;'
-        #9'.loc'#9'18'#9'1437'#9'0'
-        #9'neg.s32 '#9'%r36, %r36;'
-        #9'.loc'#9'18'#9'1439'#9'0'
-        #9'mov.u32 '#9'%r59, 0;'
-        #9'set.eq.u32.u32 '#9'%r60, %r36, %r59;'
-        #9'neg.s32 '#9'%r61, %r60;'
-        #9'not.b32 '#9'%r62, %r26;'
-        #9'add.u32 '#9'%r26, %r61, %r62;'
-        #9'.loc'#9'18'#9'1440'#9'0'
-        #9'xor.b32 '#9'%r14, %r13, -2147483648;'
-        '$Lt_0_26626:'
-        #9'.loc'#9'18'#9'1442'#9'0'
-        #9'mov.s32 '#9'%r63, %r40;'
-        #9'mov.u32 '#9'%r64, 0;'
-        #9'setp.le.s32 '#9'%p8, %r26, %r64;'
-        #9'@%p8 bra '#9'$Lt_0_34818;'
-        #9'mov.u32 '#9'%r65, 0;'
-        '$Lt_0_27650:'
-        
-          ' //<loop> Loop body line 1442, nesting depth: 1, estimated itera' +
-          'tions: unknown'
-        #9'.loc'#9'18'#9'1446'#9'0'
-        #9'shr.u32 '#9'%r66, %r36, 31;'
-        #9'shl.b32 '#9'%r67, %r26, 1;'
-        #9'add.u32 '#9'%r26, %r66, %r67;'
-        #9'.loc'#9'18'#9'1447'#9'0'
-        #9'shl.b32 '#9'%r36, %r36, 1;'
-        #9'.loc'#9'18'#9'1448'#9'0'
-        #9'sub.u32 '#9'%r65, %r65, 1;'
-        #9'mov.u32 '#9'%r68, 0;'
-        #9'setp.gt.s32 '#9'%p9, %r26, %r68;'
-        #9'@%p9 bra '#9'$Lt_0_27650;'
-        #9'bra.uni '#9'$Lt_0_27138;'
-        '$Lt_0_34818:'
-        #9'mov.u32 '#9'%r65, 0;'
-        '$Lt_0_27138:'
-        #9'.loc'#9'18'#9'1450'#9'0'
-        #9'mul.lo.u32 '#9'%r36, %r26, -921707870;'
-        #9'.loc'#9'18'#9'1451'#9'0'
-        #9'mov.u32 '#9'%r69, -921707870;'
-        #9'mul.hi.u32 '#9'%r26, %r26, %r69;'
-        #9'mov.u32 '#9'%r70, 0;'
-        #9'setp.le.s32 '#9'%p10, %r26, %r70;'
-        #9'@%p10 bra '#9'$Lt_0_28162;'
-        #9'.loc'#9'18'#9'1453'#9'0'
-        #9'shr.u32 '#9'%r71, %r36, 31;'
-        #9'shl.b32 '#9'%r72, %r26, 1;'
-        #9'add.u32 '#9'%r26, %r71, %r72;'
-        #9'.loc'#9'18'#9'1454'#9'0'
-        #9'shl.b32 '#9'%r36, %r36, 1;'
-        #9'.loc'#9'18'#9'1455'#9'0'
-        #9'sub.u32 '#9'%r65, %r65, 1;'
-        '$Lt_0_28162:'
-        #9'.loc'#9'18'#9'1457'#9'0'
-        #9'mov.u32 '#9'%r73, 0;'
-        #9'set.ne.u32.u32 '#9'%r74, %r36, %r73;'
-        #9'neg.s32 '#9'%r75, %r74;'
-        #9'add.u32 '#9'%r26, %r75, %r26;'
-        #9'.loc'#9'18'#9'1459'#9'0'
-        #9'shl.b32 '#9'%r76, %r26, 24;'
-        #9'mov.s32 '#9'%r77, 0;'
-        #9'set.lt.u32.s32 '#9'%r78, %r76, %r77;'
-        #9'neg.s32 '#9'%r79, %r78;'
-        #9'shr.u32 '#9'%r80, %r26, 8;'
-        #9'add.u32 '#9'%r81, %r65, 126;'
-        #9'shl.b32 '#9'%r82, %r81, 23;'
-        #9'add.u32 '#9'%r83, %r80, %r82;'
-        #9'add.u32 '#9'%r84, %r79, %r83;'
-        #9'or.b32 '#9'%r85, %r14, %r84;'
-        #9'mov.b32 '#9'%f16, %r85;'
-        #9'bra.uni '#9'$LDWendi___internal_fmad_204_6;'
-        '$Lt_0_24066:'
-        #9'.loc'#9'18'#9'1463'#9'0'
-        #9'mov.f32 '#9'%f17, 0f3f22f983;    '#9'// 0.63662'
-        #9'mul.f32 '#9'%f18, %f9, %f17;'
-        #9'cvt.rni.s32.f32 '#9'%r86, %f18;'
-        #9'cvt.rn.f32.s32 '#9'%f19, %r86;'
-        #9'neg.f32 '#9'%f20, %f19;'
-        #9'.loc'#9'18'#9'1472'#9'0'
-        #9'mov.s32 '#9'%r63, %r86;'
-        #9'.loc'#9'18'#9'1473'#9'0'
-        #9'mov.f32 '#9'%f21, 0f3fc90000;    '#9'// 1.57031'
-        #9'mad.f32 '#9'%f22, %f21, %f20, %f9;'
-        #9'mov.f32 '#9'%f23, 0f39fd8000;    '#9'// 0.000483513'
-        #9'mad.f32 '#9'%f24, %f23, %f20, %f22;'
-        #9'mov.f32 '#9'%f25, 0f34a88000;    '#9'// 3.13856e-007'
-        #9'mad.f32 '#9'%f26, %f25, %f20, %f24;'
-        #9'mov.f32 '#9'%f27, 0f2e85a309;    '#9'// 6.0771e-011'
-        #9'mad.f32 '#9'%f16, %f27, %f20, %f26;'
-        #9'mov.u32 '#9'%r11, __cudart_i2opi_f;'
-        '$LDWendi___internal_fmad_204_6:'
-        #9'.loc'#9'18'#9'1641'#9'0'
-        #9'mul.f32 '#9'%f28, %f16, %f16;'
-        #9'and.b32 '#9'%r87, %r63, 1;'
-        #9'mov.u32 '#9'%r88, 0;'
-        #9'setp.eq.s32 '#9'%p11, %r87, %r88;'
-        #9'@%p11 bra '#9'$Lt_0_28930;'
-        #9'.loc'#9'18'#9'1644'#9'0'
-        #9'mov.f32 '#9'%f29, 0f3f800000;    '#9'// 1'
-        #9'mov.f32 '#9'%f30, 0fbf000000;    '#9'// -0.5'
-        #9'mov.f32 '#9'%f31, 0f3d2aaaa5;    '#9'// 0.0416666'
-        #9'mov.f32 '#9'%f32, 0fbab6061a;    '#9'// -0.00138873'
-        #9'mov.f32 '#9'%f33, 0f37ccf5ce;    '#9'// 2.44332e-005'
-        #9'mad.f32 '#9'%f34, %f33, %f28, %f32;'
-        #9'mad.f32 '#9'%f35, %f28, %f34, %f31;'
-        #9'mad.f32 '#9'%f36, %f28, %f35, %f30;'
-        #9'mad.f32 '#9'%f37, %f28, %f36, %f29;'
-        #9'bra.uni '#9'$Lt_0_28674;'
-        '$Lt_0_28930:'
-        #9'.loc'#9'18'#9'1646'#9'0'
-        #9'mov.f32 '#9'%f38, 0fbe2aaaa3;    '#9'// -0.166667'
-        #9'mov.f32 '#9'%f39, 0f3c08839e;    '#9'// 0.00833216'
-        #9'mov.f32 '#9'%f40, 0fb94ca1f9;    '#9'// -0.000195153'
-        #9'mad.f32 '#9'%f41, %f40, %f28, %f39;'
-        #9'mad.f32 '#9'%f42, %f28, %f41, %f38;'
-        #9'mul.f32 '#9'%f43, %f28, %f42;'
-        #9'mad.f32 '#9'%f37, %f43, %f16, %f16;'
-        '$Lt_0_28674:'
-        #9'.loc'#9'18'#9'1648'#9'0'
-        #9'neg.f32 '#9'%f44, %f37;'
-        #9'and.b32 '#9'%r89, %r63, 2;'
-        #9'mov.s32 '#9'%r90, 0;'
-        #9'setp.ne.s32 '#9'%p12, %r89, %r90;'
-        #9'selp.f32 '#9'%f37, %f44, %f37, %p12;'
-        #9'.loc'#9'18'#9'1651'#9'0'
-        #9'mov.f32 '#9'%f14, %f37;'
-        '$LDWendi___isinff_204_5:'
-        #9'.loc'#9'18'#9'1702'#9'0'
-        #9'mov.u16 '#9'%rh3, %ctaid.y;'
-        #9'mov.u16 '#9'%rh4, %ntid.y;'
-        #9'mul.wide.u16 '#9'%r91, %rh3, %rh4;'
-        #9'ld.param.u32 '#9'%r92, [__cudaparm__Z6kernelP6float4jjf_height];'
-        #9'cvt.rn.f32.u32 '#9'%f45, %r92;'
-        #9'cvt.u32.u16 '#9'%r93, %tid.y;'
-        #9'add.u32 '#9'%r94, %r93, %r91;'
-        #9'cvt.rn.f32.u32 '#9'%f46, %r94;'
-        #9'div.full.f32 '#9'%f47, %f46, %f45;'
-        #9'add.f32 '#9'%f48, %f47, %f47;'
-        #9'mov.f32 '#9'%f49, 0fbf800000;    '#9'// -1'
-        #9'add.f32 '#9'%f50, %f48, %f49;'
-        #9'mov.f32 '#9'%f51, 0f40800000;    '#9'// 4'
-        #9'.loc'#9'18'#9'1638'#9'0'
-        #9'ld.param.f32 '#9'%f7, [__cudaparm__Z6kernelP6float4jjf_time];'
-        #9'.loc'#9'18'#9'1702'#9'0'
-        #9'mad.f32 '#9'%f52, %f51, %f50, %f7;'
-        #9'abs.f32 '#9'%f53, %f52;'
-        #9'mov.f32 '#9'%f54, 0f7f800000;    '#9'// 1.#INF'
-        #9'setp.eq.f32 '#9'%p13, %f53, %f54;'
-        #9'@!%p13 bra '#9'$Lt_0_29186;'
-        #9'.loc'#9'18'#9'1703'#9'0'
-        #9'neg.f32 '#9'%f55, %f52;'
-        #9'add.rn.f32 '#9'%f56, %f52, %f55;'
-        #9'bra.uni '#9'$LDWendi___isinff_204_1;'
-        '$Lt_0_29186:'
-        #9'mov.f32 '#9'%f57, 0f473ba700;    '#9'// 48039'
-        #9'setp.gt.f32 '#9'%p14, %f53, %f57;'
-        #9'@!%p14 bra '#9'$Lt_0_29698;'
-        #9'.loc'#9'18'#9'1396'#9'0'
-        #9'mov.b32 '#9'%r95, %f52;'
-        #9'and.b32 '#9'%r96, %r95, -2147483648;'
-        #9'mov.s32 '#9'%r97, %r96;'
-        #9'.loc'#9'18'#9'1405'#9'0'
-        #9'shl.b32 '#9'%r98, %r95, 1;'
-        #9'shr.u32 '#9'%r99, %r98, 24;'
-        #9'sub.u32 '#9'%r100, %r99, 128;'
-        #9'shr.u32 '#9'%r101, %r100, 5;'
-        #9'mov.s32 '#9'%r102, 4;'
-        #9'sub.s32 '#9'%r103, %r102, %r101;'
-        #9'.loc'#9'18'#9'24'#9'0'
-        #9'mov.s32 '#9'%r104, %r11;'
-        #9'add.u32 '#9'%r22, %r11, 24;'
-        #9'mov.u32 '#9'%r105, __cuda_result_44;'
-        #9'shl.b32 '#9'%r106, %r95, 8;'
-        #9'or.b32 '#9'%r107, %r106, -2147483648;'
-        #9'mov.u32 '#9'%r108, 0;'
-        '$Lt_0_30722:'
-        ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
-        #9'.loc'#9'18'#9'1411'#9'0'
-        #9'ld.const.u32 '#9'%r109, [%r104+0];'
-        #9'mul.lo.u32 '#9'%r110, %r109, %r107;'
-        #9'add.u32 '#9'%r111, %r110, %r108;'
-        #9'.loc'#9'18'#9'1412'#9'0'
-        #9'set.gt.u32.u32 '#9'%r112, %r110, %r111;'
-        #9'neg.s32 '#9'%r113, %r112;'
-        #9'mul.hi.u32 '#9'%r114, %r109, %r107;'
-        #9'add.u32 '#9'%r108, %r113, %r114;'
-        #9'.loc'#9'18'#9'1413'#9'0'
-        #9'st.local.u32 '#9'[%r105+0], %r111;'
-        #9'add.u32 '#9'%r105, %r105, 4;'
-        #9'add.u32 '#9'%r104, %r104, 4;'
-        #9'setp.ne.u32 '#9'%p15, %r104, %r22;'
-        #9'@%p15 bra '#9'$Lt_0_30722;'
-        #9'.loc'#9'18'#9'1415'#9'0'
-        #9'st.local.u32 '#9'[__cuda_result_44+24], %r108;'
-        #9'.loc'#9'18'#9'1420'#9'0'
-        #9'mul.lo.u32 '#9'%r115, %r103, 4;'
-        #9'mov.u32 '#9'%r116, __cuda_result_44;'
-        #9'add.u32 '#9'%r117, %r115, %r116;'
-        #9'ld.local.u32 '#9'%r108, [%r117+8];'
-        #9'.loc'#9'18'#9'1421'#9'0'
-        #9'ld.local.u32 '#9'%r118, [%r117+4];'
-        #9'and.b32 '#9'%r119, %r100, 31;'
-        #9'mov.u32 '#9'%r120, 0;'
-        #9'setp.eq.u32 '#9'%p16, %r119, %r120;'
-        #9'@%p16 bra '#9'$Lt_0_31234;'
-        #9'.loc'#9'18'#9'1423'#9'0'
-        #9'mov.s32 '#9'%r121, 32;'
-        #9'sub.s32 '#9'%r122, %r121, %r119;'
-        #9'.loc'#9'18'#9'1424'#9'0'
-        #9'shr.u32 '#9'%r123, %r118, %r122;'
-        #9'shl.b32 '#9'%r124, %r108, %r119;'
-        #9'add.u32 '#9'%r108, %r123, %r124;'
-        #9'.loc'#9'18'#9'1425'#9'0'
-        #9'ld.local.u32 '#9'%r125, [%r117+0];'
-        #9'shr.u32 '#9'%r126, %r125, %r122;'
-        #9'shl.b32 '#9'%r127, %r118, %r119;'
-        #9'add.u32 '#9'%r118, %r126, %r127;'
-        '$Lt_0_31234:'
-        #9'.loc'#9'18'#9'1427'#9'0'
-        #9'shr.u32 '#9'%r122, %r108, 30;'
-        #9'.loc'#9'18'#9'1429'#9'0'
-        #9'shr.u32 '#9'%r128, %r118, 30;'
-        #9'shl.b32 '#9'%r129, %r108, 2;'
-        #9'add.u32 '#9'%r108, %r128, %r129;'
-        #9'.loc'#9'18'#9'1430'#9'0'
-        #9'shl.b32 '#9'%r118, %r118, 2;'
-        #9'mov.u32 '#9'%r130, 0;'
-        #9'setp.eq.u32 '#9'%p17, %r118, %r130;'
-        #9'@%p17 bra '#9'$Lt_0_32002;'
-        #9'.loc'#9'18'#9'1431'#9'0'
-        #9'add.u32 '#9'%r131, %r108, 1;'
-        #9'mov.u32 '#9'%r132, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r133, %r131, %r132;'
-        #9'neg.s32 '#9'%r134, %r133;'
-        #9'bra.uni '#9'$Lt_0_31746;'
-        '$Lt_0_32002:'
-        #9'mov.u32 '#9'%r135, -2147483648;'
-        #9'set.gt.u32.u32 '#9'%r136, %r108, %r135;'
-        #9'neg.s32 '#9'%r134, %r136;'
-        '$Lt_0_31746:'
-        #9'.loc'#9'18'#9'1432'#9'0'
-        #9'add.u32 '#9'%r137, %r122, %r134;'
-        #9'.loc'#9'18'#9'1431'#9'0'
-        #9'neg.s32 '#9'%r138, %r137;'
-        #9'mov.u32 '#9'%r139, 0;'
-        #9'setp.ne.u32 '#9'%p18, %r96, %r139;'
-        #9'selp.s32 '#9'%r122, %r138, %r137, %p18;'
-        #9'mov.u32 '#9'%r140, 0;'
-        #9'setp.eq.u32 '#9'%p19, %r134, %r140;'
-        #9'@%p19 bra '#9'$Lt_0_32258;'
-        #9'.loc'#9'18'#9'1437'#9'0'
-        #9'neg.s32 '#9'%r118, %r118;'
-        #9'.loc'#9'18'#9'1439'#9'0'
-        #9'mov.u32 '#9'%r141, 0;'
-        #9'set.eq.u32.u32 '#9'%r142, %r118, %r141;'
-        #9'neg.s32 '#9'%r143, %r142;'
-        #9'not.b32 '#9'%r144, %r108;'
-        #9'add.u32 '#9'%r108, %r143, %r144;'
-        #9'.loc'#9'18'#9'1440'#9'0'
-        #9'xor.b32 '#9'%r97, %r96, -2147483648;'
-        '$Lt_0_32258:'
-        #9'.loc'#9'18'#9'1442'#9'0'
-        #9'mov.s32 '#9'%r145, %r122;'
-        #9'mov.u32 '#9'%r146, 0;'
-        #9'setp.le.s32 '#9'%p20, %r108, %r146;'
-        #9'@%p20 bra '#9'$Lt_0_35074;'
-        #9'mov.u32 '#9'%r147, 0;'
-        '$Lt_0_33282:'
-        
-          ' //<loop> Loop body line 1442, nesting depth: 1, estimated itera' +
-          'tions: unknown'
-        #9'.loc'#9'18'#9'1446'#9'0'
-        #9'shr.u32 '#9'%r148, %r118, 31;'
-        #9'shl.b32 '#9'%r149, %r108, 1;'
-        #9'add.u32 '#9'%r108, %r148, %r149;'
-        #9'.loc'#9'18'#9'1447'#9'0'
-        #9'shl.b32 '#9'%r118, %r118, 1;'
-        #9'.loc'#9'18'#9'1448'#9'0'
-        #9'sub.u32 '#9'%r147, %r147, 1;'
-        #9'mov.u32 '#9'%r150, 0;'
-        #9'setp.gt.s32 '#9'%p21, %r108, %r150;'
-        #9'@%p21 bra '#9'$Lt_0_33282;'
-        #9'bra.uni '#9'$Lt_0_32770;'
-        '$Lt_0_35074:'
-        #9'mov.u32 '#9'%r147, 0;'
-        '$Lt_0_32770:'
-        #9'.loc'#9'18'#9'1450'#9'0'
-        #9'mul.lo.u32 '#9'%r118, %r108, -921707870;'
-        #9'.loc'#9'18'#9'1451'#9'0'
-        #9'mov.u32 '#9'%r151, -921707870;'
-        #9'mul.hi.u32 '#9'%r108, %r108, %r151;'
-        #9'mov.u32 '#9'%r152, 0;'
-        #9'setp.le.s32 '#9'%p22, %r108, %r152;'
-        #9'@%p22 bra '#9'$Lt_0_33794;'
-        #9'.loc'#9'18'#9'1453'#9'0'
-        #9'shr.u32 '#9'%r153, %r118, 31;'
-        #9'shl.b32 '#9'%r154, %r108, 1;'
-        #9'add.u32 '#9'%r108, %r153, %r154;'
-        #9'.loc'#9'18'#9'1454'#9'0'
-        #9'shl.b32 '#9'%r118, %r118, 1;'
-        #9'.loc'#9'18'#9'1455'#9'0'
-        #9'sub.u32 '#9'%r147, %r147, 1;'
-        '$Lt_0_33794:'
-        #9'.loc'#9'18'#9'1457'#9'0'
-        #9'mov.u32 '#9'%r155, 0;'
-        #9'set.ne.u32.u32 '#9'%r156, %r118, %r155;'
-        #9'neg.s32 '#9'%r157, %r156;'
-        #9'add.u32 '#9'%r108, %r157, %r108;'
-        #9'.loc'#9'18'#9'1459'#9'0'
-        #9'shl.b32 '#9'%r158, %r108, 24;'
-        #9'mov.s32 '#9'%r159, 0;'
-        #9'set.lt.u32.s32 '#9'%r160, %r158, %r159;'
-        #9'neg.s32 '#9'%r161, %r160;'
-        #9'shr.u32 '#9'%r162, %r108, 8;'
-        #9'add.u32 '#9'%r163, %r147, 126;'
-        #9'shl.b32 '#9'%r164, %r163, 23;'
-        #9'add.u32 '#9'%r165, %r162, %r164;'
-        #9'add.u32 '#9'%r166, %r161, %r165;'
-        #9'or.b32 '#9'%r167, %r97, %r166;'
-        #9'mov.b32 '#9'%f58, %r167;'
-        #9'bra.uni '#9'$LDWendi___internal_fmad_204_2;'
-        '$Lt_0_29698:'
-        #9'.loc'#9'18'#9'1463'#9'0'
-        #9'mov.f32 '#9'%f59, 0f3f22f983;    '#9'// 0.63662'
-        #9'mul.f32 '#9'%f60, %f52, %f59;'
-        #9'cvt.rni.s32.f32 '#9'%r168, %f60;'
-        #9'cvt.rn.f32.s32 '#9'%f61, %r168;'
-        #9'neg.f32 '#9'%f62, %f61;'
-        #9'.loc'#9'18'#9'1472'#9'0'
-        #9'mov.s32 '#9'%r145, %r168;'
-        #9'.loc'#9'18'#9'1473'#9'0'
-        #9'mov.f32 '#9'%f63, 0f3fc90000;    '#9'// 1.57031'
-        #9'mad.f32 '#9'%f64, %f63, %f62, %f52;'
-        #9'mov.f32 '#9'%f65, 0f39fd8000;    '#9'// 0.000483513'
-        #9'mad.f32 '#9'%f66, %f65, %f62, %f64;'
-        #9'mov.f32 '#9'%f67, 0f34a88000;    '#9'// 3.13856e-007'
-        #9'mad.f32 '#9'%f68, %f67, %f62, %f66;'
-        #9'mov.f32 '#9'%f69, 0f2e85a309;    '#9'// 6.0771e-011'
-        #9'mad.f32 '#9'%f58, %f69, %f62, %f68;'
-        '$LDWendi___internal_fmad_204_2:'
-        #9'.loc'#9'18'#9'1705'#9'0'
-        #9'add.s32 '#9'%r169, %r145, 1;'
-        #9'mul.f32 '#9'%f70, %f58, %f58;'
-        #9'and.b32 '#9'%r170, %r169, 1;'
-        #9'mov.u32 '#9'%r171, 0;'
-        #9'setp.eq.s32 '#9'%p23, %r170, %r171;'
-        #9'@%p23 bra '#9'$Lt_0_34562;'
-        #9'.loc'#9'18'#9'1709'#9'0'
-        #9'mov.f32 '#9'%f71, 0f3f800000;    '#9'// 1'
-        #9'mov.f32 '#9'%f72, 0fbf000000;    '#9'// -0.5'
-        #9'mov.f32 '#9'%f73, 0f3d2aaaa5;    '#9'// 0.0416666'
-        #9'mov.f32 '#9'%f74, 0fbab6061a;    '#9'// -0.00138873'
-        #9'mov.f32 '#9'%f75, 0f37ccf5ce;    '#9'// 2.44332e-005'
-        #9'mad.f32 '#9'%f76, %f75, %f70, %f74;'
-        #9'mad.f32 '#9'%f77, %f70, %f76, %f73;'
-        #9'mad.f32 '#9'%f78, %f70, %f77, %f72;'
-        #9'mad.f32 '#9'%f79, %f70, %f78, %f71;'
-        #9'bra.uni '#9'$Lt_0_34306;'
-        '$Lt_0_34562:'
-        #9'.loc'#9'18'#9'1711'#9'0'
-        #9'mov.f32 '#9'%f80, 0fbe2aaaa3;    '#9'// -0.166667'
-        #9'mov.f32 '#9'%f81, 0f3c08839e;    '#9'// 0.00833216'
-        #9'mov.f32 '#9'%f82, 0fb94ca1f9;    '#9'// -0.000195153'
-        #9'mad.f32 '#9'%f83, %f82, %f70, %f81;'
-        #9'mad.f32 '#9'%f84, %f70, %f83, %f80;'
-        #9'mul.f32 '#9'%f85, %f70, %f84;'
-        #9'mad.f32 '#9'%f79, %f85, %f58, %f58;'
-        '$Lt_0_34306:'
-        #9'.loc'#9'18'#9'1713'#9'0'
-        #9'neg.f32 '#9'%f86, %f79;'
-        #9'and.b32 '#9'%r172, %r169, 2;'
-        #9'mov.s32 '#9'%r173, 0;'
-        #9'setp.ne.s32 '#9'%p24, %r172, %r173;'
-        #9'selp.f32 '#9'%f79, %f86, %f79, %p24;'
-        #9'.loc'#9'18'#9'1716'#9'0'
-        #9'mov.f32 '#9'%f56, %f79;'
-        '$LDWendi___isinff_204_1:'
-        #9'.loc'#9'18'#9'1638'#9'0'
-        #9'ld.param.u32 '#9'%r2, [__cudaparm__Z6kernelP6float4jjf_width];'
-        #9'.loc'#9'28'#9'25'#9'0'
-        #9'mul.lo.u32 '#9'%r174, %r94, %r2;'
-        #9'add.u32 '#9'%r175, %r4, %r174;'
-        #9'mul.lo.u32 '#9'%r176, %r175, 16;'
-        #9'ld.param.u32 '#9'%r177, [__cudaparm__Z6kernelP6float4jjf_pos];'
-        #9'add.u32 '#9'%r178, %r177, %r176;'
-        #9'mul.f32 '#9'%f87, %f56, %f14;'
-        #9'mov.f32 '#9'%f88, 0f3f000000;    '#9'// 0.5'
-        #9'mul.f32 '#9'%f89, %f87, %f88;'
-        #9'mov.f32 '#9'%f90, 0f3f800000;    '#9'// 1'
-        #9'st.global.v4.f32 '#9'[%r178+0], {%f6,%f89,%f50,%f90};'
-        #9'.loc'#9'28'#9'26'#9'0'
         #9'exit;'
         '$LDWend__Z6kernelP6float4jjf:'
         #9'} // _Z6kernelP6float4jjf'

+ 7 - 7
Examples/Demos/computing/VertexDataGeneration/fVertexGenD.pas

@@ -34,7 +34,7 @@ uses
   GLSL.Shader;
 
 type
-  TForm1 = class(TForm)
+  TFormVDG = class(TForm)
     GLScene1: TGLScene;
     GLSceneViewer1: TGLSceneViewer;
     GLCadencer1: TGLCadencer;
@@ -61,13 +61,13 @@ type
   end;
 
 var
-  Form1: TForm1;
+  FormVDG: TFormVDG;
 
 implementation
 
 {$R *.dfm}
 
-procedure TForm1.FormCreate(Sender: TObject);
+procedure TFormVDG.FormCreate(Sender: TObject);
 begin
   FieldWidth := 256;
   FieldHeight := 256;
@@ -77,19 +77,19 @@ begin
   MakeDotField.Grid.SizeY := FieldWidth div MakeDotField.BlockShape.SizeY;
 end;
 
-procedure TForm1.GLCUDA1OpenGLInteropInit(out Context: TGLContext);
+procedure TFormVDG.GLCUDA1OpenGLInteropInit(out Context: TGLContext);
 begin
   Context := GLSceneViewer1.Buffer.RenderingContext;
 end;
 
-procedure TForm1.GLSLShader1Apply(Shader: TGLCustomGLSLShader);
+procedure TFormVDG.GLSLShader1Apply(Shader: TGLCustomGLSLShader);
 begin
   with GLSceneViewer1.Buffer.RenderingContext.PipelineTransformation do
     Shader.Param['ModelViewProjectionMatrix'].AsMatrix4f :=
       MatrixMultiply(ModelViewMatrix^, ProjectionMatrix^);
 end;
 
-procedure TForm1.MakeVertexBufferParameterSetup(Sender: TObject);
+procedure TFormVDG.MakeVertexBufferParameterSetup(Sender: TObject);
 begin
   with MakeDotField do
   begin
@@ -100,7 +100,7 @@ begin
   end;
 end;
 
-procedure TForm1.GLCadencer1Progress(Sender: TObject;
+procedure TFormVDG.GLCadencer1Progress(Sender: TObject;
   const deltaTime, newTime: Double);
 begin
   GLSceneViewer1.Invalidate;

+ 93 - 0
Examples/Demos/computing/fCudaD.dfm

@@ -0,0 +1,93 @@
+object FormCudaD: TFormCudaD
+  Left = 0
+  Top = 0
+  Caption = 'Cuda D'
+  ClientHeight = 544
+  ClientWidth = 902
+  Color = clBtnFace
+  Font.Charset = DEFAULT_CHARSET
+  Font.Color = clWindowText
+  Font.Height = -12
+  Font.Name = 'Segoe UI'
+  Font.Style = []
+  Menu = MainMenu
+  Position = poScreenCenter
+  OnCreate = FormCreate
+  OnShow = FormShow
+  TextHeight = 15
+  object PanelLeft: TPanel
+    Left = 0
+    Top = 0
+    Width = 129
+    Height = 544
+    Align = alLeft
+    TabOrder = 0
+    object tvCuda: TTreeView
+      Left = 1
+      Top = 1
+      Width = 127
+      Height = 542
+      Align = alClient
+      Indent = 19
+      TabOrder = 0
+      OnClick = tvCudaClick
+      Items.NodeData = {
+        03060000003E0000000000000000000000FFFFFFFFFFFFFFFF00000000000000
+        00000000000110460061007300740046006F0075007200690065007200540072
+        0061006E0073003A0000000000000000000000FFFFFFFFFFFFFFFF0000000000
+        00000000000000010E50006F0073007400500072006F00630065007300730069
+        006E006700380000000000000000000000FFFFFFFFFFFFFFFF00000000000000
+        0000000000010D5300630061006C0061007200500072006F0064007500630074
+        00380000000000000000000000FFFFFFFFFFFFFFFF0000000000000000000000
+        00010D530069006D0070006C0065005400650078007400750072006500360000
+        000000000000000000FFFFFFFFFFFFFFFF000000000000000000000000010C53
+        007400610062006C00650046006C0075006900640073003A0000000000000000
+        000000FFFFFFFFFFFFFFFF000000000000000000000000010E56006500720074
+        006500780074004400610074006100470065006E00}
+      ExplicitLeft = 0
+      ExplicitTop = 0
+    end
+  end
+  object PageControl: TPageControl
+    Left = 129
+    Top = 0
+    Width = 773
+    Height = 544
+    ActivePage = tsVertexDataGen
+    Align = alClient
+    TabOrder = 1
+    object tsFastFourierTrans: TTabSheet
+      Caption = 'FastFourierTrans'
+      TabVisible = False
+    end
+    object tsPostProcessing: TTabSheet
+      Caption = 'PostProcessing'
+      ImageIndex = 1
+      TabVisible = False
+    end
+    object tsScalarProduct: TTabSheet
+      Caption = 'ScalarProduct'
+      ImageIndex = 2
+      TabVisible = False
+    end
+    object tsSimpleTexture: TTabSheet
+      Caption = 'SimpleTexture'
+      ImageIndex = 3
+      TabVisible = False
+    end
+    object tsStableFluids: TTabSheet
+      Caption = 'StableFluids'
+      ImageIndex = 5
+      TabVisible = False
+    end
+    object tsVertexDataGen: TTabSheet
+      Caption = 'VertexDataGen'
+      ImageIndex = 4
+      TabVisible = False
+    end
+  end
+  object MainMenu: TMainMenu
+    Left = 272
+    Top = 64
+  end
+end

+ 121 - 0
Examples/Demos/computing/fCudaD.pas

@@ -0,0 +1,121 @@
+unit fCudaD;
+
+interface
+
+uses
+  Winapi.Windows,
+  Winapi.Messages,
+  System.SysUtils,
+  System.Variants,
+  System.Classes,
+  Vcl.Graphics,
+  Vcl.Controls,
+  Vcl.Forms,
+  Vcl.Dialogs,
+  Vcl.ExtCtrls,
+  Vcl.Menus,
+  Vcl.ComCtrls,
+
+  fFastFourierD,
+  fPostProcessingD,
+  fScalarProductD,
+  fSimpleTexD,
+  fFluidsD,
+  fVertexGenD;
+
+type
+  TFormCudaD = class(TForm)
+    PanelLeft: TPanel;
+    tvCuda: TTreeView;
+    MainMenu: TMainMenu;
+    PageControl: TPageControl;
+    tsFastFourierTrans: TTabSheet;
+    tsPostProcessing: TTabSheet;
+    tsScalarProduct: TTabSheet;
+    tsSimpleTexture: TTabSheet;
+    tsVertexDataGen: TTabSheet;
+    tsStableFluids: TTabSheet;
+    procedure tvCudaClick(Sender: TObject);
+    procedure FormCreate(Sender: TObject);
+    procedure FormShow(Sender: TObject);
+  private
+  public
+  end;
+
+var
+  FormCudaD: TFormCudaD;
+
+implementation
+
+{$R *.dfm}
+
+procedure TFormCudaD.FormCreate(Sender: TObject);
+begin
+  // FastFourierTrans
+  FormFFT := TFormFFT.Create(tsFastFourierTrans);
+  FormFFT.Parent := tsFastFourierTrans;
+  FormFFT.Align := alClient;
+  FormFFT.BorderStyle := bsNone;
+  FormFFT.Show;
+
+  // PostProcessing
+  FormPP := TFormPP.Create(tsPostProcessing);
+  FormPP.Parent := tsPostProcessing;
+  FormPP.Align := alClient;
+  FormPP.BorderStyle := bsNone;
+  FormPP.Show;
+
+  // ScalarProduct
+  FormSP := TFormSP.Create(tsScalarProduct);
+  FormSP.Parent := tsScalarProduct;
+  FormSP.Align := alClient;
+  FormSP.BorderStyle := bsNone;
+  FormSP.Show;
+
+  // SimpleTexture
+  FormST := TFormST.Create(tsSimpleTexture);
+  FormST.Parent := tsSimpleTexture;
+  FormST.Align := alClient;
+  FormST.BorderStyle := bsNone;
+  FormST.Show;
+
+   // StableFluids
+  FormSF := TFormSF.Create(tsStableFluids);
+  FormSF.Parent := tsStableFluids;
+  FormSF.Align := alClient;
+  FormSF.BorderStyle := bsNone;
+  FormSF.Show;
+
+  // VertexDataGen
+  FormVDG := TFormVDG.Create(tsVertexDataGen);
+  FormVDG.Parent := tsVertexDataGen;
+  FormVDG.Align := alClient;
+  FormVDG.BorderStyle := bsNone;
+  FormVDG.Show;
+end;
+
+procedure TFormCudaD.FormShow(Sender: TObject);
+begin
+  PageControl.ActivePage := tsVertexDataGen;
+end;
+
+procedure TFormCudaD.tvCudaClick(Sender: TObject);
+begin
+  tvCuda.Items[0].DropHighlighted := False;
+   case tvCuda.Selected.Index of
+    0:
+      PageControl.ActivePage := tsFastFourierTrans;
+    1:
+      PageControl.ActivePage := tsPostProcessing;
+    2:
+      PageControl.ActivePage := tsScalarProduct;
+    3:
+      PageControl.ActivePage := tsSimpleTexture;
+    4:
+      PageControl.ActivePage := tsStableFluids;
+    5:
+      PageControl.ActivePage := tsVertexDataGen;
+  end;
+end;
+
+end.

+ 1 - 0
Examples/Demos/materials/multimaterial/fMultiMaterialD.pas

@@ -4,6 +4,7 @@ interface
 
 uses
   Winapi.OpenGL,
+  Winapi.OpenGLext,
   System.SysUtils,
   System.Classes,
   System.Math,

+ 0 - 2
Source/FmShaderMemo.dfm

@@ -13,11 +13,9 @@ object ShaderMemoForm: TShaderMemoForm
   Font.Height = -11
   Font.Name = 'Tahoma'
   Font.Style = []
-  OldCreateOrder = True
   OnCreate = FormCreate
   OnDestroy = FormDestroy
   OnShow = FormShow
-  PixelsPerInch = 96
   TextHeight = 13
   object ToolBar: TToolBar
     Left = 0

+ 2 - 1
Source/FmShaderMemo.pas

@@ -19,6 +19,7 @@ uses
   System.Variants,
   System.Classes,
   System.Win.Registry,
+  System.ImageList,
   VCL.Controls,
   VCL.Forms,
   VCL.ComCtrls,
@@ -30,7 +31,7 @@ uses
   VCL.ExtCtrls,
   VCL.StdCtrls,
   VCL.Graphics,
-   
+
   GLS.Memo;
 
 type