object Form1: TForm1 Left = 0 Top = 0 BorderStyle = bsDialog Caption = 'CUDA: simple using of texture' ClientHeight = 446 ClientWidth = 782 Color = clBtnFace Font.Charset = DEFAULT_CHARSET Font.Color = clWindowText Font.Height = -15 Font.Name = 'Segoe UI' Font.Style = [] Position = poScreenCenter OnCreate = FormCreate OnDestroy = FormDestroy PixelsPerInch = 120 TextHeight = 20 object Button1: TButton Left = 608 Top = 328 Width = 94 Height = 32 Margins.Left = 4 Margins.Top = 4 Margins.Right = 4 Margins.Bottom = 4 Caption = 'Button1' TabOrder = 0 OnClick = Button1Click end object Memo1: TMemo Left = 0 Top = 0 Width = 782 Height = 273 Margins.Left = 4 Margins.Top = 4 Margins.Right = 4 Margins.Bottom = 4 Align = alTop ScrollBars = ssVertical TabOrder = 1 end object GLCUDA1: TGLCUDA ComputingDevice = GLCUDADevice1 Left = 104 Top = 56 object MainModule: TCUDAModule Code.Strings = ( #9'.version 1.4' #9'.target sm_10, map_f64_to_f32' #9'// compiled with C:\CUDA\bin/../open64/lib//be.exe' #9'// nvopencc 3.0 built on 2009-10-29' '' #9'//-----------------------------------------------------------' #9'// Compiling C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a' + '14_00000001-9_temp.cpp3.i (C:/Users/YARUND~1/AppData/Local/Temp/' + 'ccBI#.a03752)' #9'//-----------------------------------------------------------' '' #9'//-----------------------------------------------------------' #9'// Options:' #9'//-----------------------------------------------------------' #9'// Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32' #9'// -O3'#9'(Optimization level)' #9'// -g0'#9'(Debug level)' #9'// -m2'#9'(Report advisories)' #9'//-----------------------------------------------------------' '' #9'.file'#9'1'#9'"C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a14_0' + '0000001-8_temp.cudafe2.gpu"' #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' + 'E\crtdefs.h"' #9'.file'#9'3'#9'"C:\CUDA\include\crt/device_runtime.h"' #9'.file'#9'4'#9'"C:\CUDA\include\host_defines.h"' #9'.file'#9'5'#9'"C:\CUDA\include\builtin_types.h"' #9'.file'#9'6'#9'"c:\cuda\include\device_types.h"' #9'.file'#9'7'#9'"c:\cuda\include\driver_types.h"' #9'.file'#9'8'#9'"c:\cuda\include\surface_types.h"' #9'.file'#9'9'#9'"c:\cuda\include\texture_types.h"' #9'.file'#9'10'#9'"c:\cuda\include\vector_types.h"' #9'.file'#9'11'#9'"c:\cuda\include\host_defines.h"' #9'.file'#9'12'#9'"C:\CUDA\include\device_launch_parameters.h"' #9'.file'#9'13'#9'"c:\cuda\include\crt\storage_class.h"' #9'.file'#9'14'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' + 'DE\time.h"' #9'.file'#9'15'#9'"C:/Users/YARUND~1/AppData/Local/Temp/temp.cu"' #9'.file'#9'16'#9'"C:\CUDA\include\common_functions.h"' #9'.file'#9'17'#9'"c:\cuda\include\crt/func_macro.h"' #9'.file'#9'18'#9'"c:\cuda\include\math_functions.h"' #9'.file'#9'19'#9'"c:\cuda\include\device_functions.h"' #9'.file'#9'20'#9'"c:\cuda\include\math_constants.h"' #9'.file'#9'21'#9'"c:\cuda\include\sm_11_atomic_functions.h"' #9'.file'#9'22'#9'"c:\cuda\include\sm_12_atomic_functions.h"' #9'.file'#9'23'#9'"c:\cuda\include\sm_13_double_functions.h"' #9'.file'#9'24'#9'"c:\cuda\include\common_types.h"' #9'.file'#9'25'#9'"c:\cuda\include\sm_20_atomic_functions.h"' #9'.file'#9'26'#9'"c:\cuda\include\sm_20_intrinsics.h"' #9'.file'#9'27'#9'"c:\cuda\include\surface_functions.h"' #9'.file'#9'28'#9'"c:\cuda\include\texture_fetch_functions.h"' #9'.file'#9'29'#9'"c:\cuda\include\math_functions_dbl_ptx1.h"' '' #9'.tex .u32 tex;' #9'.const .align 4 .b8 __cudart_i2opi_f[24] = {65,144,67,60,153,14' + '9,98,219,192,221,52,245,209,87,39,252,41,21,68,78,110,131,249,16' + '2};' '' #9'.entry transformKernel (' #9#9'.param .u32 __cudaparm_transformKernel_g_odata,' #9#9'.param .s32 __cudaparm_transformKernel_width,' #9#9'.param .s32 __cudaparm_transformKernel_height,' #9#9'.param .f32 __cudaparm_transformKernel_theta)' #9'{' #9'.reg .u16 %rh<6>;' #9'.reg .u32 %r<279>;' #9'.reg .f32 %f<145>;' #9'.reg .pred %p<50>;' #9'.local .align 4 .b8 __cuda_result_16[28];' #9'.local .align 4 .b8 __cuda_result_44[28];' #9'.loc'#9'15'#9'10'#9'0' '$LBB1_transformKernel:' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'abs.f32 '#9'%f2, %f1;' #9'mov.f32 '#9'%f3, 0f7f800000; '#9'// 1.#INF' #9'setp.eq.f32 '#9'%p1, %f2, %f3;' #9'@!%p1 bra '#9'$Lt_0_46850;' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'neg.f32 '#9'%f4, %f1;' #9'add.rn.f32 '#9'%f5, %f1, %f4;' #9'mov.u32 '#9'%r1, __cudart_i2opi_f;' #9'mov.u32 '#9'%r2, __cuda_result_16;' #9'bra.uni '#9'$Lt_0_3330;' '$Lt_0_46850:' #9'.loc'#9'18'#9'1622'#9'0' #9'mov.f32 '#9'%f6, 0f473ba700; '#9'// 48039' #9'setp.gt.f32 '#9'%p2, %f2, %f6;' #9'.loc'#9'18'#9'1625'#9'0' #9'mov.u32 '#9'%r1, __cudart_i2opi_f;' #9'.loc'#9'18'#9'1622'#9'0' #9'@!%p2 bra '#9'$Lt_0_47362;' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1625'#9'0' #9'mov.b32 '#9'%r3, %f1;' #9'and.b32 '#9'%r4, %r3, -2147483648;' #9'mov.s32 '#9'%r5, %r4;' #9'.loc'#9'18'#9'24'#9'0' #9'shl.b32 '#9'%r6, %r3, 8;' #9'mov.s32 '#9'%r7, %r1;' #9'add.u32 '#9'%r8, %r1, 24;' #9'mov.u32 '#9'%r9, __cuda_result_16;' #9'or.b32 '#9'%r10, %r6, -2147483648;' #9'mov.u32 '#9'%r11, 0;' '$Lt_0_48386:' ' // Loop body line 24, nesting depth: 1, iterations: 6' #9'.loc'#9'18'#9'1642'#9'0' #9'ld.const.u32 '#9'%r12, [%r7+0];' #9'mul.lo.u32 '#9'%r13, %r12, %r10;' #9'add.u32 '#9'%r14, %r13, %r11;' #9'.loc'#9'18'#9'1643'#9'0' #9'set.gt.u32.u32 '#9'%r15, %r13, %r14;' #9'neg.s32 '#9'%r16, %r15;' #9'mul.hi.u32 '#9'%r17, %r12, %r10;' #9'add.u32 '#9'%r11, %r16, %r17;' #9'.loc'#9'18'#9'1644'#9'0' #9'st.local.u32 '#9'[%r9+0], %r14;' #9'add.u32 '#9'%r9, %r9, 4;' #9'add.u32 '#9'%r7, %r7, 4;' #9'setp.ne.u32 '#9'%p3, %r7, %r8;' #9'@%p3 bra '#9'$Lt_0_48386;' #9'.loc'#9'18'#9'1646'#9'0' #9'mov.u32 '#9'%r2, __cuda_result_16;' #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;' #9'.loc'#9'18'#9'1651'#9'0' #9'shl.b32 '#9'%r18, %r3, 1;' #9'shr.u32 '#9'%r19, %r18, 24;' #9'sub.u32 '#9'%r20, %r19, 128;' #9'shr.u32 '#9'%r21, %r20, 5;' #9'mov.s32 '#9'%r22, 4;' #9'sub.s32 '#9'%r23, %r22, %r21;' #9'mul.lo.u32 '#9'%r24, %r23, 4;' #9'add.u32 '#9'%r25, %r24, %r2;' #9'ld.local.u32 '#9'%r11, [%r25+8];' #9'.loc'#9'18'#9'1652'#9'0' #9'ld.local.u32 '#9'%r26, [%r25+4];' #9'and.b32 '#9'%r27, %r20, 31;' #9'mov.u32 '#9'%r28, 0;' #9'setp.eq.u32 '#9'%p4, %r27, %r28;' #9'@%p4 bra '#9'$Lt_0_48898;' #9'.loc'#9'18'#9'1655'#9'0' #9'mov.s32 '#9'%r29, 32;' #9'sub.s32 '#9'%r30, %r29, %r27;' #9'shr.u32 '#9'%r31, %r26, %r30;' #9'shl.b32 '#9'%r32, %r11, %r27;' #9'add.u32 '#9'%r11, %r31, %r32;' #9'.loc'#9'18'#9'1656'#9'0' #9'ld.local.u32 '#9'%r33, [%r25+0];' #9'shr.u32 '#9'%r34, %r33, %r30;' #9'shl.b32 '#9'%r35, %r26, %r27;' #9'add.u32 '#9'%r26, %r34, %r35;' '$Lt_0_48898:' #9'.loc'#9'18'#9'1658'#9'0' #9'shr.u32 '#9'%r36, %r11, 30;' #9'.loc'#9'18'#9'1660'#9'0' #9'shr.u32 '#9'%r37, %r26, 30;' #9'shl.b32 '#9'%r38, %r11, 2;' #9'add.u32 '#9'%r11, %r37, %r38;' #9'.loc'#9'18'#9'1661'#9'0' #9'shl.b32 '#9'%r26, %r26, 2;' #9'mov.u32 '#9'%r39, 0;' #9'setp.eq.u32 '#9'%p5, %r26, %r39;' #9'@%p5 bra '#9'$Lt_0_49666;' #9'.loc'#9'18'#9'1662'#9'0' #9'add.u32 '#9'%r40, %r11, 1;' #9'mov.u32 '#9'%r41, -2147483648;' #9'set.gt.u32.u32 '#9'%r42, %r40, %r41;' #9'neg.s32 '#9'%r43, %r42;' #9'bra.uni '#9'$Lt_0_49410;' '$Lt_0_49666:' #9'mov.u32 '#9'%r44, -2147483648;' #9'set.gt.u32.u32 '#9'%r45, %r11, %r44;' #9'neg.s32 '#9'%r43, %r45;' '$Lt_0_49410:' #9'.loc'#9'18'#9'1663'#9'0' #9'add.u32 '#9'%r36, %r36, %r43;' #9'.loc'#9'18'#9'1662'#9'0' #9'neg.s32 '#9'%r46, %r36;' #9'mov.u32 '#9'%r47, 0;' #9'setp.ne.u32 '#9'%p6, %r4, %r47;' #9'selp.s32 '#9'%r36, %r46, %r36, %p6;' #9'mov.u32 '#9'%r48, 0;' #9'setp.eq.u32 '#9'%p7, %r43, %r48;' #9'@%p7 bra '#9'$Lt_0_49922;' #9'.loc'#9'18'#9'1668'#9'0' #9'neg.s32 '#9'%r26, %r26;' #9'.loc'#9'18'#9'1670'#9'0' #9'mov.u32 '#9'%r49, 0;' #9'set.eq.u32.u32 '#9'%r50, %r26, %r49;' #9'neg.s32 '#9'%r51, %r50;' #9'not.b32 '#9'%r52, %r11;' #9'add.u32 '#9'%r11, %r51, %r52;' #9'.loc'#9'18'#9'1671'#9'0' #9'xor.b32 '#9'%r5, %r4, -2147483648;' '$Lt_0_49922:' #9'.loc'#9'18'#9'1673'#9'0' #9'mov.s32 '#9'%r53, %r36;' #9'mov.u32 '#9'%r54, 0;' #9'setp.le.s32 '#9'%p8, %r11, %r54;' #9'mov.u32 '#9'%r55, 0;' #9'@%p8 bra '#9'$Lt_0_69378;' '$Lt_0_50946:' ' // Loop body line 1673, nesting depth: 1, estimated itera' + 'tions: unknown' #9'.loc'#9'18'#9'1677'#9'0' #9'shr.u32 '#9'%r56, %r26, 31;' #9'shl.b32 '#9'%r57, %r11, 1;' #9'add.u32 '#9'%r11, %r56, %r57;' #9'.loc'#9'18'#9'1678'#9'0' #9'shl.b32 '#9'%r26, %r26, 1;' #9'.loc'#9'18'#9'1679'#9'0' #9'sub.u32 '#9'%r55, %r55, 1;' #9'mov.u32 '#9'%r58, 0;' #9'setp.gt.s32 '#9'%p9, %r11, %r58;' #9'@%p9 bra '#9'$Lt_0_50946;' #9'bra.uni '#9'$Lt_0_50434;' '$Lt_0_69378:' '$Lt_0_50434:' #9'.loc'#9'18'#9'1681'#9'0' #9'mul.lo.u32 '#9'%r26, %r11, -921707870;' #9'.loc'#9'18'#9'1682'#9'0' #9'mov.u32 '#9'%r59, -921707870;' #9'mul.hi.u32 '#9'%r11, %r11, %r59;' #9'mov.u32 '#9'%r60, 0;' #9'setp.le.s32 '#9'%p10, %r11, %r60;' #9'@%p10 bra '#9'$Lt_0_51458;' #9'.loc'#9'18'#9'1684'#9'0' #9'shr.u32 '#9'%r61, %r26, 31;' #9'shl.b32 '#9'%r62, %r11, 1;' #9'add.u32 '#9'%r11, %r61, %r62;' #9'.loc'#9'18'#9'1685'#9'0' #9'shl.b32 '#9'%r26, %r26, 1;' #9'.loc'#9'18'#9'1686'#9'0' #9'sub.u32 '#9'%r55, %r55, 1;' '$Lt_0_51458:' #9'.loc'#9'18'#9'1688'#9'0' #9'mov.u32 '#9'%r63, 0;' #9'set.ne.u32.u32 '#9'%r64, %r26, %r63;' #9'neg.s32 '#9'%r65, %r64;' #9'add.u32 '#9'%r11, %r65, %r11;' #9'shl.b32 '#9'%r66, %r11, 24;' #9'mov.s32 '#9'%r67, 0;' #9'set.lt.u32.s32 '#9'%r68, %r66, %r67;' #9'neg.s32 '#9'%r69, %r68;' #9'shr.u32 '#9'%r70, %r11, 8;' #9'add.u32 '#9'%r71, %r55, 126;' #9'shl.b32 '#9'%r72, %r71, 23;' #9'add.u32 '#9'%r73, %r70, %r72;' #9'add.u32 '#9'%r74, %r69, %r73;' #9'or.b32 '#9'%r75, %r5, %r74;' #9'mov.b32 '#9'%f7, %r75;' #9'bra.uni '#9'$Lt_0_3586;' '$Lt_0_47362:' #9'.loc'#9'18'#9'1703'#9'0' #9'mov.f32 '#9'%f8, 0f3f22f983; '#9'// 0.63662' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1703'#9'0' #9'mul.f32 '#9'%f9, %f1, %f8;' #9'cvt.rni.s32.f32 '#9'%r76, %f9;' #9'mov.s32 '#9'%r53, %r76;' #9'cvt.rn.f32.s32 '#9'%f10, %r76;' #9'neg.f32 '#9'%f11, %f10;' #9'mov.f32 '#9'%f12, 0f3fc90000; '#9'// 1.57031' #9'mad.f32 '#9'%f13, %f12, %f11, %f1;' #9'mov.f32 '#9'%f14, 0f39fd8000; '#9'// 0.000483513' #9'mad.f32 '#9'%f15, %f14, %f11, %f13;' #9'mov.f32 '#9'%f16, 0f34a88000; '#9'// 3.13856e-007' #9'mad.f32 '#9'%f17, %f16, %f11, %f15;' #9'mov.f32 '#9'%f18, 0f2e85a309; '#9'// 6.0771e-011' #9'mad.f32 '#9'%f7, %f18, %f11, %f17;' #9'mov.u32 '#9'%r2, __cuda_result_16;' '$Lt_0_3586:' #9'.loc'#9'18'#9'1949'#9'0' #9'add.s32 '#9'%r77, %r53, 1;' #9'mul.f32 '#9'%f19, %f7, %f7;' #9'and.b32 '#9'%r78, %r77, 1;' #9'mov.u32 '#9'%r79, 0;' #9'setp.eq.s32 '#9'%p11, %r78, %r79;' #9'@%p11 bra '#9'$Lt_0_52226;' #9'.loc'#9'18'#9'1953'#9'0' #9'mov.f32 '#9'%f20, 0f3f800000; '#9'// 1' #9'mov.f32 '#9'%f21, 0fbf000000; '#9'// -0.5' #9'mov.f32 '#9'%f22, 0f3d2aaaa5; '#9'// 0.0416666' #9'mov.f32 '#9'%f23, 0fbab6061a; '#9'// -0.00138873' #9'mov.f32 '#9'%f24, 0f37ccf5ce; '#9'// 2.44332e-005' #9'mad.f32 '#9'%f25, %f24, %f19, %f23;' #9'mad.f32 '#9'%f26, %f19, %f25, %f22;' #9'mad.f32 '#9'%f27, %f19, %f26, %f21;' #9'mad.f32 '#9'%f28, %f19, %f27, %f20;' #9'bra.uni '#9'$Lt_0_51970;' '$Lt_0_52226:' #9'.loc'#9'18'#9'1955'#9'0' #9'mov.f32 '#9'%f29, 0fbe2aaaa3; '#9'// -0.166667' #9'mov.f32 '#9'%f30, 0f3c08839e; '#9'// 0.00833216' #9'mov.f32 '#9'%f31, 0fb94ca1f9; '#9'// -0.000195153' #9'mad.f32 '#9'%f32, %f31, %f19, %f30;' #9'mad.f32 '#9'%f33, %f19, %f32, %f29;' #9'mul.f32 '#9'%f34, %f19, %f33;' #9'mad.f32 '#9'%f28, %f34, %f7, %f7;' '$Lt_0_51970:' #9'.loc'#9'18'#9'1957'#9'0' #9'neg.f32 '#9'%f35, %f28;' #9'and.b32 '#9'%r80, %r77, 2;' #9'mov.s32 '#9'%r81, 0;' #9'setp.ne.s32 '#9'%p12, %r80, %r81;' #9'selp.f32 '#9'%f28, %f35, %f28, %p12;' #9'mov.f32 '#9'%f5, %f28;' '$Lt_0_3330:' #9'.loc'#9'18'#9'1869'#9'0' #9'mov.f32 '#9'%f36, 0f00000000; '#9'// 0' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1869'#9'0' #9'setp.eq.f32 '#9'%p13, %f1, %f36;' #9'selp.s32 '#9'%r82, 1, 0, %p13;' #9'selp.s32 '#9'%r83, 1, 0, %p1;' #9'or.b32 '#9'%r84, %r82, %r83;' #9'mov.u32 '#9'%r85, 0;' #9'setp.eq.s32 '#9'%p14, %r84, %r85;' #9'@%p14 bra '#9'$Lt_0_52482;' #9'mov.f32 '#9'%f37, 0f00000000; '#9'// 0' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1869'#9'0' #9'mul.rn.f32 '#9'%f38, %f1, %f37;' #9'mov.u32 '#9'%r86, __cuda_result_44;' #9'bra.uni '#9'$Lt_0_2306;' '$Lt_0_52482:' #9'.loc'#9'18'#9'1622'#9'0' #9'mov.f32 '#9'%f39, 0f473ba700; '#9'// 48039' #9'setp.gt.f32 '#9'%p15, %f2, %f39;' #9'@!%p15 bra '#9'$Lt_0_52994;' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1625'#9'0' #9'mov.b32 '#9'%r3, %f1;' #9'and.b32 '#9'%r4, %r3, -2147483648;' #9'mov.s32 '#9'%r87, %r4;' #9'.loc'#9'18'#9'24'#9'0' #9'shl.b32 '#9'%r6, %r3, 8;' #9'mov.s32 '#9'%r88, %r1;' #9'add.u32 '#9'%r8, %r1, 24;' #9'mov.u32 '#9'%r89, __cuda_result_44;' #9'or.b32 '#9'%r10, %r6, -2147483648;' #9'mov.u32 '#9'%r90, 0;' '$Lt_0_54018:' ' // Loop body line 24, nesting depth: 1, iterations: 6' #9'.loc'#9'18'#9'1642'#9'0' #9'ld.const.u32 '#9'%r91, [%r88+0];' #9'mul.lo.u32 '#9'%r92, %r91, %r10;' #9'add.u32 '#9'%r93, %r92, %r90;' #9'.loc'#9'18'#9'1643'#9'0' #9'set.gt.u32.u32 '#9'%r94, %r92, %r93;' #9'neg.s32 '#9'%r95, %r94;' #9'mul.hi.u32 '#9'%r96, %r91, %r10;' #9'add.u32 '#9'%r90, %r95, %r96;' #9'.loc'#9'18'#9'1644'#9'0' #9'st.local.u32 '#9'[%r89+0], %r93;' #9'add.u32 '#9'%r89, %r89, 4;' #9'add.u32 '#9'%r88, %r88, 4;' #9'setp.ne.u32 '#9'%p16, %r88, %r8;' #9'@%p16 bra '#9'$Lt_0_54018;' #9'.loc'#9'18'#9'1646'#9'0' #9'mov.u32 '#9'%r86, __cuda_result_44;' #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;' #9'.loc'#9'18'#9'1651'#9'0' #9'shl.b32 '#9'%r97, %r3, 1;' #9'shr.u32 '#9'%r19, %r97, 24;' #9'sub.u32 '#9'%r20, %r19, 128;' #9'shr.u32 '#9'%r21, %r20, 5;' #9'mov.s32 '#9'%r98, 4;' #9'sub.s32 '#9'%r23, %r98, %r21;' #9'mul.lo.u32 '#9'%r24, %r23, 4;' #9'add.u32 '#9'%r99, %r24, %r86;' #9'ld.local.u32 '#9'%r90, [%r99+8];' #9'.loc'#9'18'#9'1652'#9'0' #9'ld.local.u32 '#9'%r100, [%r99+4];' #9'and.b32 '#9'%r27, %r20, 31;' #9'mov.u32 '#9'%r101, 0;' #9'setp.eq.u32 '#9'%p17, %r27, %r101;' #9'@%p17 bra '#9'$Lt_0_54530;' #9'.loc'#9'18'#9'1655'#9'0' #9'mov.s32 '#9'%r102, 32;' #9'sub.s32 '#9'%r30, %r102, %r27;' #9'shr.u32 '#9'%r103, %r100, %r30;' #9'shl.b32 '#9'%r104, %r90, %r27;' #9'add.u32 '#9'%r90, %r103, %r104;' #9'.loc'#9'18'#9'1656'#9'0' #9'ld.local.u32 '#9'%r105, [%r99+0];' #9'shr.u32 '#9'%r106, %r105, %r30;' #9'shl.b32 '#9'%r107, %r100, %r27;' #9'add.u32 '#9'%r100, %r106, %r107;' '$Lt_0_54530:' #9'.loc'#9'18'#9'1658'#9'0' #9'shr.u32 '#9'%r108, %r90, 30;' #9'.loc'#9'18'#9'1660'#9'0' #9'shr.u32 '#9'%r109, %r100, 30;' #9'shl.b32 '#9'%r110, %r90, 2;' #9'add.u32 '#9'%r90, %r109, %r110;' #9'.loc'#9'18'#9'1661'#9'0' #9'shl.b32 '#9'%r100, %r100, 2;' #9'mov.u32 '#9'%r111, 0;' #9'setp.eq.u32 '#9'%p18, %r100, %r111;' #9'@%p18 bra '#9'$Lt_0_55298;' #9'.loc'#9'18'#9'1662'#9'0' #9'add.u32 '#9'%r112, %r90, 1;' #9'mov.u32 '#9'%r113, -2147483648;' #9'set.gt.u32.u32 '#9'%r114, %r112, %r113;' #9'neg.s32 '#9'%r115, %r114;' #9'bra.uni '#9'$Lt_0_55042;' '$Lt_0_55298:' #9'mov.u32 '#9'%r116, -2147483648;' #9'set.gt.u32.u32 '#9'%r117, %r90, %r116;' #9'neg.s32 '#9'%r115, %r117;' '$Lt_0_55042:' #9'.loc'#9'18'#9'1663'#9'0' #9'add.u32 '#9'%r108, %r108, %r115;' #9'.loc'#9'18'#9'1662'#9'0' #9'neg.s32 '#9'%r118, %r108;' #9'mov.u32 '#9'%r119, 0;' #9'setp.ne.u32 '#9'%p19, %r4, %r119;' #9'selp.s32 '#9'%r108, %r118, %r108, %p19;' #9'mov.u32 '#9'%r120, 0;' #9'setp.eq.u32 '#9'%p20, %r115, %r120;' #9'@%p20 bra '#9'$Lt_0_55554;' #9'.loc'#9'18'#9'1668'#9'0' #9'neg.s32 '#9'%r100, %r100;' #9'.loc'#9'18'#9'1670'#9'0' #9'mov.u32 '#9'%r121, 0;' #9'set.eq.u32.u32 '#9'%r122, %r100, %r121;' #9'neg.s32 '#9'%r123, %r122;' #9'not.b32 '#9'%r124, %r90;' #9'add.u32 '#9'%r90, %r123, %r124;' #9'.loc'#9'18'#9'1671'#9'0' #9'xor.b32 '#9'%r87, %r4, -2147483648;' '$Lt_0_55554:' #9'.loc'#9'18'#9'1673'#9'0' #9'mov.s32 '#9'%r125, %r108;' #9'mov.u32 '#9'%r126, 0;' #9'setp.le.s32 '#9'%p21, %r90, %r126;' #9'mov.u32 '#9'%r127, 0;' #9'@%p21 bra '#9'$Lt_0_69634;' '$Lt_0_56578:' ' // Loop body line 1673, nesting depth: 1, estimated itera' + 'tions: unknown' #9'.loc'#9'18'#9'1677'#9'0' #9'shr.u32 '#9'%r128, %r100, 31;' #9'shl.b32 '#9'%r129, %r90, 1;' #9'add.u32 '#9'%r90, %r128, %r129;' #9'.loc'#9'18'#9'1678'#9'0' #9'shl.b32 '#9'%r100, %r100, 1;' #9'.loc'#9'18'#9'1679'#9'0' #9'sub.u32 '#9'%r127, %r127, 1;' #9'mov.u32 '#9'%r130, 0;' #9'setp.gt.s32 '#9'%p22, %r90, %r130;' #9'@%p22 bra '#9'$Lt_0_56578;' #9'bra.uni '#9'$Lt_0_56066;' '$Lt_0_69634:' '$Lt_0_56066:' #9'.loc'#9'18'#9'1681'#9'0' #9'mul.lo.u32 '#9'%r100, %r90, -921707870;' #9'.loc'#9'18'#9'1682'#9'0' #9'mov.u32 '#9'%r131, -921707870;' #9'mul.hi.u32 '#9'%r90, %r90, %r131;' #9'mov.u32 '#9'%r132, 0;' #9'setp.le.s32 '#9'%p23, %r90, %r132;' #9'@%p23 bra '#9'$Lt_0_57090;' #9'.loc'#9'18'#9'1684'#9'0' #9'shr.u32 '#9'%r133, %r100, 31;' #9'shl.b32 '#9'%r134, %r90, 1;' #9'add.u32 '#9'%r90, %r133, %r134;' #9'.loc'#9'18'#9'1685'#9'0' #9'shl.b32 '#9'%r100, %r100, 1;' #9'.loc'#9'18'#9'1686'#9'0' #9'sub.u32 '#9'%r127, %r127, 1;' '$Lt_0_57090:' #9'.loc'#9'18'#9'1688'#9'0' #9'mov.u32 '#9'%r135, 0;' #9'set.ne.u32.u32 '#9'%r136, %r100, %r135;' #9'neg.s32 '#9'%r137, %r136;' #9'add.u32 '#9'%r90, %r137, %r90;' #9'shl.b32 '#9'%r138, %r90, 24;' #9'mov.s32 '#9'%r139, 0;' #9'set.lt.u32.s32 '#9'%r140, %r138, %r139;' #9'neg.s32 '#9'%r141, %r140;' #9'shr.u32 '#9'%r142, %r90, 8;' #9'add.u32 '#9'%r143, %r127, 126;' #9'shl.b32 '#9'%r144, %r143, 23;' #9'add.u32 '#9'%r145, %r142, %r144;' #9'add.u32 '#9'%r146, %r141, %r145;' #9'or.b32 '#9'%r147, %r87, %r146;' #9'mov.b32 '#9'%f40, %r147;' #9'bra.uni '#9'$Lt_0_2562;' '$Lt_0_52994:' #9'.loc'#9'18'#9'1703'#9'0' #9'mov.f32 '#9'%f41, 0f3f22f983; '#9'// 0.63662' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1703'#9'0' #9'mul.f32 '#9'%f9, %f1, %f41;' #9'cvt.rni.s32.f32 '#9'%r76, %f9;' #9'mov.s32 '#9'%r125, %r76;' #9'cvt.rn.f32.s32 '#9'%f10, %r76;' #9'neg.f32 '#9'%f11, %f10;' #9'mov.f32 '#9'%f42, 0f3fc90000; '#9'// 1.57031' #9'mad.f32 '#9'%f43, %f42, %f11, %f1;' #9'mov.f32 '#9'%f44, 0f39fd8000; '#9'// 0.000483513' #9'mad.f32 '#9'%f45, %f44, %f11, %f43;' #9'mov.f32 '#9'%f46, 0f34a88000; '#9'// 3.13856e-007' #9'mad.f32 '#9'%f47, %f46, %f11, %f45;' #9'mov.f32 '#9'%f48, 0f2e85a309; '#9'// 6.0771e-011' #9'mad.f32 '#9'%f40, %f48, %f11, %f47;' #9'mov.u32 '#9'%r86, __cuda_result_44;' '$Lt_0_2562:' #9'.loc'#9'18'#9'1872'#9'0' #9'mul.f32 '#9'%f49, %f40, %f40;' #9'and.b32 '#9'%r148, %r125, 1;' #9'mov.u32 '#9'%r149, 0;' #9'setp.eq.s32 '#9'%p24, %r148, %r149;' #9'@%p24 bra '#9'$Lt_0_57858;' #9'.loc'#9'18'#9'1875'#9'0' #9'mov.f32 '#9'%f50, 0f3f800000; '#9'// 1' #9'mov.f32 '#9'%f51, 0fbf000000; '#9'// -0.5' #9'mov.f32 '#9'%f52, 0f3d2aaaa5; '#9'// 0.0416666' #9'mov.f32 '#9'%f53, 0fbab6061a; '#9'// -0.00138873' #9'mov.f32 '#9'%f54, 0f37ccf5ce; '#9'// 2.44332e-005' #9'mad.f32 '#9'%f55, %f54, %f49, %f53;' #9'mad.f32 '#9'%f56, %f49, %f55, %f52;' #9'mad.f32 '#9'%f57, %f49, %f56, %f51;' #9'mad.f32 '#9'%f58, %f49, %f57, %f50;' #9'bra.uni '#9'$Lt_0_57602;' '$Lt_0_57858:' #9'.loc'#9'18'#9'1877'#9'0' #9'mov.f32 '#9'%f59, 0fbe2aaaa3; '#9'// -0.166667' #9'mov.f32 '#9'%f60, 0f3c08839e; '#9'// 0.00833216' #9'mov.f32 '#9'%f61, 0fb94ca1f9; '#9'// -0.000195153' #9'mad.f32 '#9'%f62, %f61, %f49, %f60;' #9'mad.f32 '#9'%f63, %f49, %f62, %f59;' #9'mul.f32 '#9'%f64, %f49, %f63;' #9'mad.f32 '#9'%f58, %f64, %f40, %f40;' '$Lt_0_57602:' #9'.loc'#9'18'#9'1879'#9'0' #9'neg.f32 '#9'%f65, %f58;' #9'and.b32 '#9'%r150, %r125, 2;' #9'mov.s32 '#9'%r151, 0;' #9'setp.ne.s32 '#9'%p25, %r150, %r151;' #9'selp.f32 '#9'%f58, %f65, %f58, %p25;' #9'mov.f32 '#9'%f38, %f58;' '$Lt_0_2306:' #9'.loc'#9'15'#9'22'#9'0' #9'mov.u16 '#9'%rh1, %ctaid.y;' #9'mov.u16 '#9'%rh2, %ntid.y;' #9'mul.wide.u16 '#9'%r152, %rh1, %rh2;' #9'ld.param.s32 '#9'%r153, [__cudaparm_transformKernel_height];' #9'cvt.rn.f32.s32 '#9'%f66, %r153;' #9'mov.u16 '#9'%rh3, %ctaid.x;' #9'mov.u16 '#9'%rh4, %ntid.x;' #9'mul.wide.u16 '#9'%r154, %rh3, %rh4;' #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];' #9'cvt.rn.f32.s32 '#9'%f67, %r155;' #9'cvt.u32.u16 '#9'%r156, %tid.y;' #9'add.u32 '#9'%r157, %r156, %r152;' #9'cvt.u32.u16 '#9'%r158, %tid.x;' #9'add.u32 '#9'%r159, %r158, %r154;' #9'cvt.rn.f32.u32 '#9'%f68, %r157;' #9'cvt.rn.f32.u32 '#9'%f69, %r159;' #9'div.full.f32 '#9'%f70, %f68, %f66;' #9'div.full.f32 '#9'%f71, %f69, %f67;' #9'mov.f32 '#9'%f72, 0fbf000000; '#9'// -0.5' #9'add.f32 '#9'%f73, %f70, %f72;' #9'mov.f32 '#9'%f74, 0fbf000000; '#9'// -0.5' #9'add.f32 '#9'%f75, %f71, %f74;' #9'mul.f32 '#9'%f76, %f38, %f73;' #9'mul.f32 '#9'%f77, %f75, %f5;' #9'sub.f32 '#9'%f78, %f77, %f76;' #9'mov.f32 '#9'%f79, 0f3f000000; '#9'// 0.5' #9'add.f32 '#9'%f80, %f78, %f79;' #9'.loc'#9'18'#9'1946'#9'0' #9'@!%p1 bra '#9'$Lt_0_58114;' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'neg.f32 '#9'%f81, %f1;' #9'add.rn.f32 '#9'%f82, %f1, %f81;' #9'bra.uni '#9'$Lt_0_1282;' '$Lt_0_58114:' #9'.loc'#9'18'#9'1622'#9'0' #9'mov.f32 '#9'%f83, 0f473ba700; '#9'// 48039' #9'setp.gt.f32 '#9'%p26, %f2, %f83;' #9'@!%p26 bra '#9'$Lt_0_58626;' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1625'#9'0' #9'mov.b32 '#9'%r3, %f1;' #9'and.b32 '#9'%r4, %r3, -2147483648;' #9'mov.s32 '#9'%r5, %r4;' #9'.loc'#9'18'#9'24'#9'0' #9'shl.b32 '#9'%r6, %r3, 8;' #9'mov.s32 '#9'%r7, %r1;' #9'add.u32 '#9'%r8, %r1, 24;' #9'mov.u32 '#9'%r9, __cuda_result_16;' #9'or.b32 '#9'%r10, %r6, -2147483648;' #9'mov.u32 '#9'%r11, 0;' '$Lt_0_59650:' ' // Loop body line 24, nesting depth: 1, iterations: 6' #9'.loc'#9'18'#9'1642'#9'0' #9'ld.const.u32 '#9'%r160, [%r7+0];' #9'mul.lo.u32 '#9'%r161, %r160, %r10;' #9'add.u32 '#9'%r162, %r161, %r11;' #9'.loc'#9'18'#9'1643'#9'0' #9'set.gt.u32.u32 '#9'%r163, %r161, %r162;' #9'neg.s32 '#9'%r164, %r163;' #9'mul.hi.u32 '#9'%r165, %r160, %r10;' #9'add.u32 '#9'%r11, %r164, %r165;' #9'.loc'#9'18'#9'1644'#9'0' #9'st.local.u32 '#9'[%r9+0], %r162;' #9'add.u32 '#9'%r9, %r9, 4;' #9'add.u32 '#9'%r7, %r7, 4;' #9'setp.ne.u32 '#9'%p27, %r7, %r8;' #9'@%p27 bra '#9'$Lt_0_59650;' #9'.loc'#9'18'#9'1646'#9'0' #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;' #9'.loc'#9'18'#9'1651'#9'0' #9'shl.b32 '#9'%r166, %r3, 1;' #9'shr.u32 '#9'%r19, %r166, 24;' #9'sub.u32 '#9'%r20, %r19, 128;' #9'shr.u32 '#9'%r21, %r20, 5;' #9'mov.s32 '#9'%r167, 4;' #9'sub.s32 '#9'%r23, %r167, %r21;' #9'mul.lo.u32 '#9'%r24, %r23, 4;' #9'add.u32 '#9'%r25, %r24, %r2;' #9'ld.local.u32 '#9'%r11, [%r25+8];' #9'.loc'#9'18'#9'1652'#9'0' #9'ld.local.u32 '#9'%r26, [%r25+4];' #9'and.b32 '#9'%r27, %r20, 31;' #9'mov.u32 '#9'%r168, 0;' #9'setp.eq.u32 '#9'%p28, %r27, %r168;' #9'@%p28 bra '#9'$Lt_0_60162;' #9'.loc'#9'18'#9'1655'#9'0' #9'mov.s32 '#9'%r169, 32;' #9'sub.s32 '#9'%r30, %r169, %r27;' #9'shr.u32 '#9'%r170, %r26, %r30;' #9'shl.b32 '#9'%r171, %r11, %r27;' #9'add.u32 '#9'%r11, %r170, %r171;' #9'.loc'#9'18'#9'1656'#9'0' #9'ld.local.u32 '#9'%r172, [%r25+0];' #9'shr.u32 '#9'%r173, %r172, %r30;' #9'shl.b32 '#9'%r174, %r26, %r27;' #9'add.u32 '#9'%r26, %r173, %r174;' '$Lt_0_60162:' #9'.loc'#9'18'#9'1658'#9'0' #9'shr.u32 '#9'%r36, %r11, 30;' #9'.loc'#9'18'#9'1660'#9'0' #9'shr.u32 '#9'%r175, %r26, 30;' #9'shl.b32 '#9'%r176, %r11, 2;' #9'add.u32 '#9'%r11, %r175, %r176;' #9'.loc'#9'18'#9'1661'#9'0' #9'shl.b32 '#9'%r26, %r26, 2;' #9'mov.u32 '#9'%r177, 0;' #9'setp.eq.u32 '#9'%p29, %r26, %r177;' #9'@%p29 bra '#9'$Lt_0_60930;' #9'.loc'#9'18'#9'1662'#9'0' #9'add.u32 '#9'%r178, %r11, 1;' #9'mov.u32 '#9'%r179, -2147483648;' #9'set.gt.u32.u32 '#9'%r180, %r178, %r179;' #9'neg.s32 '#9'%r181, %r180;' #9'bra.uni '#9'$Lt_0_60674;' '$Lt_0_60930:' #9'mov.u32 '#9'%r182, -2147483648;' #9'set.gt.u32.u32 '#9'%r183, %r11, %r182;' #9'neg.s32 '#9'%r181, %r183;' '$Lt_0_60674:' #9'.loc'#9'18'#9'1663'#9'0' #9'add.u32 '#9'%r36, %r36, %r181;' #9'.loc'#9'18'#9'1662'#9'0' #9'neg.s32 '#9'%r184, %r36;' #9'mov.u32 '#9'%r185, 0;' #9'setp.ne.u32 '#9'%p30, %r4, %r185;' #9'selp.s32 '#9'%r36, %r184, %r36, %p30;' #9'mov.u32 '#9'%r186, 0;' #9'setp.eq.u32 '#9'%p31, %r181, %r186;' #9'@%p31 bra '#9'$Lt_0_61186;' #9'.loc'#9'18'#9'1668'#9'0' #9'neg.s32 '#9'%r26, %r26;' #9'.loc'#9'18'#9'1670'#9'0' #9'mov.u32 '#9'%r187, 0;' #9'set.eq.u32.u32 '#9'%r188, %r26, %r187;' #9'neg.s32 '#9'%r189, %r188;' #9'not.b32 '#9'%r190, %r11;' #9'add.u32 '#9'%r11, %r189, %r190;' #9'.loc'#9'18'#9'1671'#9'0' #9'xor.b32 '#9'%r5, %r4, -2147483648;' '$Lt_0_61186:' #9'.loc'#9'18'#9'1673'#9'0' #9'mov.s32 '#9'%r53, %r36;' #9'mov.u32 '#9'%r191, 0;' #9'setp.le.s32 '#9'%p32, %r11, %r191;' #9'@%p32 bra '#9'$Lt_0_69890;' #9'mov.u32 '#9'%r55, 0;' '$Lt_0_62210:' ' // Loop body line 1673, nesting depth: 1, estimated itera' + 'tions: unknown' #9'.loc'#9'18'#9'1677'#9'0' #9'shr.u32 '#9'%r192, %r26, 31;' #9'shl.b32 '#9'%r193, %r11, 1;' #9'add.u32 '#9'%r11, %r192, %r193;' #9'.loc'#9'18'#9'1678'#9'0' #9'shl.b32 '#9'%r26, %r26, 1;' #9'.loc'#9'18'#9'1679'#9'0' #9'sub.u32 '#9'%r55, %r55, 1;' #9'mov.u32 '#9'%r194, 0;' #9'setp.gt.s32 '#9'%p33, %r11, %r194;' #9'@%p33 bra '#9'$Lt_0_62210;' #9'bra.uni '#9'$Lt_0_61698;' '$Lt_0_69890:' #9'mov.u32 '#9'%r55, 0;' '$Lt_0_61698:' #9'.loc'#9'18'#9'1681'#9'0' #9'mul.lo.u32 '#9'%r26, %r11, -921707870;' #9'.loc'#9'18'#9'1682'#9'0' #9'mov.u32 '#9'%r195, -921707870;' #9'mul.hi.u32 '#9'%r11, %r11, %r195;' #9'mov.u32 '#9'%r196, 0;' #9'setp.le.s32 '#9'%p34, %r11, %r196;' #9'@%p34 bra '#9'$Lt_0_62722;' #9'.loc'#9'18'#9'1684'#9'0' #9'shr.u32 '#9'%r197, %r26, 31;' #9'shl.b32 '#9'%r198, %r11, 1;' #9'add.u32 '#9'%r11, %r197, %r198;' #9'.loc'#9'18'#9'1685'#9'0' #9'shl.b32 '#9'%r26, %r26, 1;' #9'.loc'#9'18'#9'1686'#9'0' #9'sub.u32 '#9'%r55, %r55, 1;' '$Lt_0_62722:' #9'.loc'#9'18'#9'1688'#9'0' #9'mov.u32 '#9'%r199, 0;' #9'set.ne.u32.u32 '#9'%r200, %r26, %r199;' #9'neg.s32 '#9'%r201, %r200;' #9'add.u32 '#9'%r11, %r201, %r11;' #9'shl.b32 '#9'%r202, %r11, 24;' #9'mov.s32 '#9'%r203, 0;' #9'set.lt.u32.s32 '#9'%r204, %r202, %r203;' #9'neg.s32 '#9'%r205, %r204;' #9'shr.u32 '#9'%r206, %r11, 8;' #9'add.u32 '#9'%r207, %r55, 126;' #9'shl.b32 '#9'%r208, %r207, 23;' #9'add.u32 '#9'%r209, %r206, %r208;' #9'add.u32 '#9'%r210, %r205, %r209;' #9'or.b32 '#9'%r211, %r5, %r210;' #9'mov.b32 '#9'%f7, %r211;' #9'bra.uni '#9'$Lt_0_1538;' '$Lt_0_58626:' #9'.loc'#9'18'#9'1703'#9'0' #9'mov.f32 '#9'%f84, 0f3f22f983; '#9'// 0.63662' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1703'#9'0' #9'mul.f32 '#9'%f9, %f1, %f84;' #9'cvt.rni.s32.f32 '#9'%r76, %f9;' #9'mov.s32 '#9'%r53, %r76;' #9'cvt.rn.f32.s32 '#9'%f10, %r76;' #9'neg.f32 '#9'%f11, %f10;' #9'mov.f32 '#9'%f85, 0f3fc90000; '#9'// 1.57031' #9'mad.f32 '#9'%f86, %f85, %f11, %f1;' #9'mov.f32 '#9'%f87, 0f39fd8000; '#9'// 0.000483513' #9'mad.f32 '#9'%f88, %f87, %f11, %f86;' #9'mov.f32 '#9'%f89, 0f34a88000; '#9'// 3.13856e-007' #9'mad.f32 '#9'%f90, %f89, %f11, %f88;' #9'mov.f32 '#9'%f91, 0f2e85a309; '#9'// 6.0771e-011' #9'mad.f32 '#9'%f7, %f91, %f11, %f90;' '$Lt_0_1538:' #9'.loc'#9'18'#9'1949'#9'0' #9'add.s32 '#9'%r77, %r53, 1;' #9'mul.f32 '#9'%f19, %f7, %f7;' #9'and.b32 '#9'%r212, %r77, 1;' #9'mov.u32 '#9'%r213, 0;' #9'setp.eq.s32 '#9'%p35, %r212, %r213;' #9'@%p35 bra '#9'$Lt_0_63490;' #9'.loc'#9'18'#9'1953'#9'0' #9'mov.f32 '#9'%f92, 0f3f800000; '#9'// 1' #9'mov.f32 '#9'%f93, 0fbf000000; '#9'// -0.5' #9'mov.f32 '#9'%f94, 0f3d2aaaa5; '#9'// 0.0416666' #9'mov.f32 '#9'%f95, 0fbab6061a; '#9'// -0.00138873' #9'mov.f32 '#9'%f96, 0f37ccf5ce; '#9'// 2.44332e-005' #9'mad.f32 '#9'%f97, %f96, %f19, %f95;' #9'mad.f32 '#9'%f98, %f19, %f97, %f94;' #9'mad.f32 '#9'%f99, %f19, %f98, %f93;' #9'mad.f32 '#9'%f28, %f19, %f99, %f92;' #9'bra.uni '#9'$Lt_0_63234;' '$Lt_0_63490:' #9'.loc'#9'18'#9'1955'#9'0' #9'mov.f32 '#9'%f100, 0fbe2aaaa3; '#9'// -0.166667' #9'mov.f32 '#9'%f101, 0f3c08839e; '#9'// 0.00833216' #9'mov.f32 '#9'%f102, 0fb94ca1f9; '#9'// -0.000195153' #9'mad.f32 '#9'%f103, %f102, %f19, %f101;' #9'mad.f32 '#9'%f104, %f19, %f103, %f100;' #9'mul.f32 '#9'%f105, %f19, %f104;' #9'mad.f32 '#9'%f28, %f105, %f7, %f7;' '$Lt_0_63234:' #9'.loc'#9'18'#9'1957'#9'0' #9'neg.f32 '#9'%f106, %f28;' #9'and.b32 '#9'%r214, %r77, 2;' #9'mov.s32 '#9'%r215, 0;' #9'setp.ne.s32 '#9'%p36, %r214, %r215;' #9'selp.f32 '#9'%f28, %f106, %f28, %p36;' #9'mov.f32 '#9'%f82, %f28;' '$Lt_0_1282:' #9'.loc'#9'18'#9'1869'#9'0' #9'mov.u32 '#9'%r216, 0;' #9'setp.eq.s32 '#9'%p37, %r84, %r216;' #9'@%p37 bra '#9'$Lt_0_63746;' #9'mov.f32 '#9'%f107, 0f00000000; '#9'// 0' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1869'#9'0' #9'mul.rn.f32 '#9'%f38, %f1, %f107;' #9'bra.uni '#9'$Lt_0_258;' '$Lt_0_63746:' #9'.loc'#9'18'#9'1622'#9'0' #9'mov.f32 '#9'%f108, 0f473ba700; '#9'// 48039' #9'setp.gt.f32 '#9'%p38, %f2, %f108;' #9'@!%p38 bra '#9'$Lt_0_64258;' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1625'#9'0' #9'mov.b32 '#9'%r3, %f1;' #9'and.b32 '#9'%r4, %r3, -2147483648;' #9'mov.s32 '#9'%r87, %r4;' #9'.loc'#9'18'#9'24'#9'0' #9'shl.b32 '#9'%r6, %r3, 8;' #9'mov.s32 '#9'%r88, %r1;' #9'add.u32 '#9'%r8, %r1, 24;' #9'mov.u32 '#9'%r89, __cuda_result_44;' #9'or.b32 '#9'%r10, %r6, -2147483648;' #9'mov.u32 '#9'%r90, 0;' '$Lt_0_65282:' ' // Loop body line 24, nesting depth: 1, iterations: 6' #9'.loc'#9'18'#9'1642'#9'0' #9'ld.const.u32 '#9'%r217, [%r88+0];' #9'mul.lo.u32 '#9'%r218, %r217, %r10;' #9'add.u32 '#9'%r219, %r218, %r90;' #9'.loc'#9'18'#9'1643'#9'0' #9'set.gt.u32.u32 '#9'%r220, %r218, %r219;' #9'neg.s32 '#9'%r221, %r220;' #9'mul.hi.u32 '#9'%r222, %r217, %r10;' #9'add.u32 '#9'%r90, %r221, %r222;' #9'.loc'#9'18'#9'1644'#9'0' #9'st.local.u32 '#9'[%r89+0], %r219;' #9'add.u32 '#9'%r89, %r89, 4;' #9'add.u32 '#9'%r88, %r88, 4;' #9'setp.ne.u32 '#9'%p39, %r88, %r8;' #9'@%p39 bra '#9'$Lt_0_65282;' #9'.loc'#9'18'#9'1646'#9'0' #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;' #9'.loc'#9'18'#9'1651'#9'0' #9'shl.b32 '#9'%r223, %r3, 1;' #9'shr.u32 '#9'%r19, %r223, 24;' #9'sub.u32 '#9'%r20, %r19, 128;' #9'shr.u32 '#9'%r21, %r20, 5;' #9'mov.s32 '#9'%r224, 4;' #9'sub.s32 '#9'%r23, %r224, %r21;' #9'mul.lo.u32 '#9'%r24, %r23, 4;' #9'add.u32 '#9'%r99, %r24, %r86;' #9'ld.local.u32 '#9'%r90, [%r99+8];' #9'.loc'#9'18'#9'1652'#9'0' #9'ld.local.u32 '#9'%r100, [%r99+4];' #9'and.b32 '#9'%r27, %r20, 31;' #9'mov.u32 '#9'%r225, 0;' #9'setp.eq.u32 '#9'%p40, %r27, %r225;' #9'@%p40 bra '#9'$Lt_0_65794;' #9'.loc'#9'18'#9'1655'#9'0' #9'mov.s32 '#9'%r226, 32;' #9'sub.s32 '#9'%r30, %r226, %r27;' #9'shr.u32 '#9'%r227, %r100, %r30;' #9'shl.b32 '#9'%r228, %r90, %r27;' #9'add.u32 '#9'%r90, %r227, %r228;' #9'.loc'#9'18'#9'1656'#9'0' #9'ld.local.u32 '#9'%r229, [%r99+0];' #9'shr.u32 '#9'%r230, %r229, %r30;' #9'shl.b32 '#9'%r231, %r100, %r27;' #9'add.u32 '#9'%r100, %r230, %r231;' '$Lt_0_65794:' #9'.loc'#9'18'#9'1658'#9'0' #9'shr.u32 '#9'%r108, %r90, 30;' #9'.loc'#9'18'#9'1660'#9'0' #9'shr.u32 '#9'%r232, %r100, 30;' #9'shl.b32 '#9'%r233, %r90, 2;' #9'add.u32 '#9'%r90, %r232, %r233;' #9'.loc'#9'18'#9'1661'#9'0' #9'shl.b32 '#9'%r100, %r100, 2;' #9'mov.u32 '#9'%r234, 0;' #9'setp.eq.u32 '#9'%p41, %r100, %r234;' #9'@%p41 bra '#9'$Lt_0_66562;' #9'.loc'#9'18'#9'1662'#9'0' #9'add.u32 '#9'%r235, %r90, 1;' #9'mov.u32 '#9'%r236, -2147483648;' #9'set.gt.u32.u32 '#9'%r237, %r235, %r236;' #9'neg.s32 '#9'%r238, %r237;' #9'bra.uni '#9'$Lt_0_66306;' '$Lt_0_66562:' #9'mov.u32 '#9'%r239, -2147483648;' #9'set.gt.u32.u32 '#9'%r240, %r90, %r239;' #9'neg.s32 '#9'%r238, %r240;' '$Lt_0_66306:' #9'.loc'#9'18'#9'1663'#9'0' #9'add.u32 '#9'%r108, %r108, %r238;' #9'.loc'#9'18'#9'1662'#9'0' #9'neg.s32 '#9'%r241, %r108;' #9'mov.u32 '#9'%r242, 0;' #9'setp.ne.u32 '#9'%p42, %r4, %r242;' #9'selp.s32 '#9'%r108, %r241, %r108, %p42;' #9'mov.u32 '#9'%r243, 0;' #9'setp.eq.u32 '#9'%p43, %r238, %r243;' #9'@%p43 bra '#9'$Lt_0_66818;' #9'.loc'#9'18'#9'1668'#9'0' #9'neg.s32 '#9'%r100, %r100;' #9'.loc'#9'18'#9'1670'#9'0' #9'mov.u32 '#9'%r244, 0;' #9'set.eq.u32.u32 '#9'%r245, %r100, %r244;' #9'neg.s32 '#9'%r246, %r245;' #9'not.b32 '#9'%r247, %r90;' #9'add.u32 '#9'%r90, %r246, %r247;' #9'.loc'#9'18'#9'1671'#9'0' #9'xor.b32 '#9'%r87, %r4, -2147483648;' '$Lt_0_66818:' #9'.loc'#9'18'#9'1673'#9'0' #9'mov.s32 '#9'%r125, %r108;' #9'mov.u32 '#9'%r248, 0;' #9'setp.le.s32 '#9'%p44, %r90, %r248;' #9'@%p44 bra '#9'$Lt_0_70146;' #9'mov.u32 '#9'%r127, 0;' '$Lt_0_67842:' ' // Loop body line 1673, nesting depth: 1, estimated itera' + 'tions: unknown' #9'.loc'#9'18'#9'1677'#9'0' #9'shr.u32 '#9'%r249, %r100, 31;' #9'shl.b32 '#9'%r250, %r90, 1;' #9'add.u32 '#9'%r90, %r249, %r250;' #9'.loc'#9'18'#9'1678'#9'0' #9'shl.b32 '#9'%r100, %r100, 1;' #9'.loc'#9'18'#9'1679'#9'0' #9'sub.u32 '#9'%r127, %r127, 1;' #9'mov.u32 '#9'%r251, 0;' #9'setp.gt.s32 '#9'%p45, %r90, %r251;' #9'@%p45 bra '#9'$Lt_0_67842;' #9'bra.uni '#9'$Lt_0_67330;' '$Lt_0_70146:' #9'mov.u32 '#9'%r127, 0;' '$Lt_0_67330:' #9'.loc'#9'18'#9'1681'#9'0' #9'mul.lo.u32 '#9'%r100, %r90, -921707870;' #9'.loc'#9'18'#9'1682'#9'0' #9'mov.u32 '#9'%r252, -921707870;' #9'mul.hi.u32 '#9'%r90, %r90, %r252;' #9'mov.u32 '#9'%r253, 0;' #9'setp.le.s32 '#9'%p46, %r90, %r253;' #9'@%p46 bra '#9'$Lt_0_68354;' #9'.loc'#9'18'#9'1684'#9'0' #9'shr.u32 '#9'%r254, %r100, 31;' #9'shl.b32 '#9'%r255, %r90, 1;' #9'add.u32 '#9'%r90, %r254, %r255;' #9'.loc'#9'18'#9'1685'#9'0' #9'shl.b32 '#9'%r100, %r100, 1;' #9'.loc'#9'18'#9'1686'#9'0' #9'sub.u32 '#9'%r127, %r127, 1;' '$Lt_0_68354:' #9'.loc'#9'18'#9'1688'#9'0' #9'mov.u32 '#9'%r256, 0;' #9'set.ne.u32.u32 '#9'%r257, %r100, %r256;' #9'neg.s32 '#9'%r258, %r257;' #9'add.u32 '#9'%r90, %r258, %r90;' #9'shl.b32 '#9'%r259, %r90, 24;' #9'mov.s32 '#9'%r260, 0;' #9'set.lt.u32.s32 '#9'%r261, %r259, %r260;' #9'neg.s32 '#9'%r262, %r261;' #9'shr.u32 '#9'%r263, %r90, 8;' #9'add.u32 '#9'%r264, %r127, 126;' #9'shl.b32 '#9'%r265, %r264, 23;' #9'add.u32 '#9'%r266, %r263, %r265;' #9'add.u32 '#9'%r267, %r262, %r266;' #9'or.b32 '#9'%r268, %r87, %r267;' #9'mov.b32 '#9'%f40, %r268;' #9'bra.uni '#9'$Lt_0_514;' '$Lt_0_64258:' #9'.loc'#9'18'#9'1703'#9'0' #9'mov.f32 '#9'%f109, 0f3f22f983; '#9'// 0.63662' #9'.loc'#9'18'#9'1946'#9'0' #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];' #9'.loc'#9'18'#9'1703'#9'0' #9'mul.f32 '#9'%f9, %f1, %f109;' #9'cvt.rni.s32.f32 '#9'%r76, %f9;' #9'mov.s32 '#9'%r125, %r76;' #9'cvt.rn.f32.s32 '#9'%f10, %r76;' #9'neg.f32 '#9'%f11, %f10;' #9'mov.f32 '#9'%f110, 0f3fc90000; '#9'// 1.57031' #9'mad.f32 '#9'%f111, %f110, %f11, %f1;' #9'mov.f32 '#9'%f112, 0f39fd8000; '#9'// 0.000483513' #9'mad.f32 '#9'%f113, %f112, %f11, %f111;' #9'mov.f32 '#9'%f114, 0f34a88000; '#9'// 3.13856e-007' #9'mad.f32 '#9'%f115, %f114, %f11, %f113;' #9'mov.f32 '#9'%f116, 0f2e85a309; '#9'// 6.0771e-011' #9'mad.f32 '#9'%f40, %f116, %f11, %f115;' '$Lt_0_514:' #9'.loc'#9'18'#9'1872'#9'0' #9'mul.f32 '#9'%f49, %f40, %f40;' #9'and.b32 '#9'%r269, %r125, 1;' #9'mov.u32 '#9'%r270, 0;' #9'setp.eq.s32 '#9'%p47, %r269, %r270;' #9'@%p47 bra '#9'$Lt_0_69122;' #9'.loc'#9'18'#9'1875'#9'0' #9'mov.f32 '#9'%f117, 0f3f800000; '#9'// 1' #9'mov.f32 '#9'%f118, 0fbf000000; '#9'// -0.5' #9'mov.f32 '#9'%f119, 0f3d2aaaa5; '#9'// 0.0416666' #9'mov.f32 '#9'%f120, 0fbab6061a; '#9'// -0.00138873' #9'mov.f32 '#9'%f121, 0f37ccf5ce; '#9'// 2.44332e-005' #9'mad.f32 '#9'%f122, %f121, %f49, %f120;' #9'mad.f32 '#9'%f123, %f49, %f122, %f119;' #9'mad.f32 '#9'%f124, %f49, %f123, %f118;' #9'mad.f32 '#9'%f58, %f49, %f124, %f117;' #9'bra.uni '#9'$Lt_0_68866;' '$Lt_0_69122:' #9'.loc'#9'18'#9'1877'#9'0' #9'mov.f32 '#9'%f125, 0fbe2aaaa3; '#9'// -0.166667' #9'mov.f32 '#9'%f126, 0f3c08839e; '#9'// 0.00833216' #9'mov.f32 '#9'%f127, 0fb94ca1f9; '#9'// -0.000195153' #9'mad.f32 '#9'%f128, %f127, %f49, %f126;' #9'mad.f32 '#9'%f129, %f49, %f128, %f125;' #9'mul.f32 '#9'%f130, %f49, %f129;' #9'mad.f32 '#9'%f58, %f130, %f40, %f40;' '$Lt_0_68866:' #9'.loc'#9'18'#9'1879'#9'0' #9'neg.f32 '#9'%f131, %f58;' #9'and.b32 '#9'%r271, %r125, 2;' #9'mov.s32 '#9'%r272, 0;' #9'setp.ne.s32 '#9'%p48, %r271, %r272;' #9'selp.f32 '#9'%f58, %f131, %f58, %p48;' #9'mov.f32 '#9'%f38, %f58;' '$Lt_0_258:' #9'.loc'#9'15'#9'23'#9'0' #9'mov.f32 '#9'%f132, %f80;' #9'mul.f32 '#9'%f133, %f82, %f73;' #9'mad.f32 '#9'%f134, %f75, %f38, %f133;' #9'mov.f32 '#9'%f135, 0f3f000000; '#9'// 0.5' #9'add.f32 '#9'%f136, %f134, %f135;' #9'mov.f32 '#9'%f137, 0f00000000; '#9'// 0' #9'mov.f32 '#9'%f138, 0f00000000; '#9'// 0' #9'tex.2d.v4.f32.f32 {%f139,%f140,%f141,%f142},[tex,{%f132,%f136,%' + 'f137,%f138}];' #9'.loc'#9'15'#9'26'#9'0' #9'mov.f32 '#9'%f143, %f139;' #9'ld.param.u32 '#9'%r273, [__cudaparm_transformKernel_g_odata];' #9'.loc'#9'15'#9'22'#9'0' #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];' #9'.loc'#9'15'#9'26'#9'0' #9'mul.lo.u32 '#9'%r274, %r155, %r157;' #9'add.u32 '#9'%r275, %r159, %r274;' #9'mul.lo.u32 '#9'%r276, %r275, 4;' #9'add.u32 '#9'%r277, %r273, %r276;' #9'st.global.f32 '#9'[%r277+0], %f143;' #9'.loc'#9'15'#9'27'#9'0' #9'exit;' '$LDWend_transformKernel:' #9'} // transformKernel' '') object TurnPicture: TCUDAFunction KernelName = 'transformKernel' BlockShape.SizeX = 8 BlockShape.SizeY = 8 Grid.SizeX = 64 Grid.SizeY = 64 end object Image: TCUDATexture KernelName = 'tex' AddressModeS = amWrap AddressModeT = amWrap FilterMode = fmLinear Format = ctFloat ChannelNum = cnOne MemDataArray = TextureArray end end object TextureArray: TCUDAMemData Width = 512 Height = 512 MemoryType = mtArray ChannelsType = ctFloat end object ResultData: TCUDAMemData Width = 512 Height = 512 MemoryType = mtDevice ChannelsType = ctFloat end end object GLCUDADevice1: TGLCUDADevice SelectDevice = 'GeForce GTX 1050 Ti (1)' Left = 336 Top = 56 end object GLCUDACompiler1: TGLCUDACompiler Left = 558 Top = 59 end end