|
@@ -1,4 +1,4 @@
|
|
-object Form1: TForm1
|
|
|
|
|
|
+object FormST: TFormST
|
|
Left = 0
|
|
Left = 0
|
|
Top = 0
|
|
Top = 0
|
|
BorderStyle = bsDialog
|
|
BorderStyle = bsDialog
|
|
@@ -55,9 +55,7 @@ object Form1: TForm1
|
|
''
|
|
''
|
|
#9'//-----------------------------------------------------------'
|
|
#9'//-----------------------------------------------------------'
|
|
|
|
|
|
- #9'// Compiling C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a' +
|
|
|
|
- '14_00000001-9_temp.cpp3.i (C:/Users/YARUND~1/AppData/Local/Temp/' +
|
|
|
|
- 'ccBI#.a03752)'
|
|
|
|
|
|
+ #9'// Compiling C:/Users/VPV~1/AppData/Local/Temp/...)'
|
|
#9'//-----------------------------------------------------------'
|
|
#9'//-----------------------------------------------------------'
|
|
''
|
|
''
|
|
#9'//-----------------------------------------------------------'
|
|
#9'//-----------------------------------------------------------'
|
|
@@ -69,1055 +67,6 @@ object Form1: TForm1
|
|
#9'// -m2'#9'(Report advisories)'
|
|
#9'// -m2'#9'(Report advisories)'
|
|
#9'//-----------------------------------------------------------'
|
|
#9'//-----------------------------------------------------------'
|
|
''
|
|
''
|
|
-
|
|
|
|
- #9'.file'#9'1'#9'"C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a14_0' +
|
|
|
|
- '0000001-8_temp.cudafe2.gpu"'
|
|
|
|
-
|
|
|
|
- #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
|
|
|
|
- 'E\crtdefs.h"'
|
|
|
|
- #9'.file'#9'3'#9'"C:\CUDA\include\crt/device_runtime.h"'
|
|
|
|
- #9'.file'#9'4'#9'"C:\CUDA\include\host_defines.h"'
|
|
|
|
- #9'.file'#9'5'#9'"C:\CUDA\include\builtin_types.h"'
|
|
|
|
- #9'.file'#9'6'#9'"c:\cuda\include\device_types.h"'
|
|
|
|
- #9'.file'#9'7'#9'"c:\cuda\include\driver_types.h"'
|
|
|
|
- #9'.file'#9'8'#9'"c:\cuda\include\surface_types.h"'
|
|
|
|
- #9'.file'#9'9'#9'"c:\cuda\include\texture_types.h"'
|
|
|
|
- #9'.file'#9'10'#9'"c:\cuda\include\vector_types.h"'
|
|
|
|
- #9'.file'#9'11'#9'"c:\cuda\include\host_defines.h"'
|
|
|
|
- #9'.file'#9'12'#9'"C:\CUDA\include\device_launch_parameters.h"'
|
|
|
|
- #9'.file'#9'13'#9'"c:\cuda\include\crt\storage_class.h"'
|
|
|
|
-
|
|
|
|
- #9'.file'#9'14'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' +
|
|
|
|
- 'DE\time.h"'
|
|
|
|
- #9'.file'#9'15'#9'"C:/Users/YARUND~1/AppData/Local/Temp/temp.cu"'
|
|
|
|
- #9'.file'#9'16'#9'"C:\CUDA\include\common_functions.h"'
|
|
|
|
- #9'.file'#9'17'#9'"c:\cuda\include\crt/func_macro.h"'
|
|
|
|
- #9'.file'#9'18'#9'"c:\cuda\include\math_functions.h"'
|
|
|
|
- #9'.file'#9'19'#9'"c:\cuda\include\device_functions.h"'
|
|
|
|
- #9'.file'#9'20'#9'"c:\cuda\include\math_constants.h"'
|
|
|
|
- #9'.file'#9'21'#9'"c:\cuda\include\sm_11_atomic_functions.h"'
|
|
|
|
- #9'.file'#9'22'#9'"c:\cuda\include\sm_12_atomic_functions.h"'
|
|
|
|
- #9'.file'#9'23'#9'"c:\cuda\include\sm_13_double_functions.h"'
|
|
|
|
- #9'.file'#9'24'#9'"c:\cuda\include\common_types.h"'
|
|
|
|
- #9'.file'#9'25'#9'"c:\cuda\include\sm_20_atomic_functions.h"'
|
|
|
|
- #9'.file'#9'26'#9'"c:\cuda\include\sm_20_intrinsics.h"'
|
|
|
|
- #9'.file'#9'27'#9'"c:\cuda\include\surface_functions.h"'
|
|
|
|
- #9'.file'#9'28'#9'"c:\cuda\include\texture_fetch_functions.h"'
|
|
|
|
- #9'.file'#9'29'#9'"c:\cuda\include\math_functions_dbl_ptx1.h"'
|
|
|
|
- ''
|
|
|
|
- #9'.tex .u32 tex;'
|
|
|
|
-
|
|
|
|
- #9'.const .align 4 .b8 __cudart_i2opi_f[24] = {65,144,67,60,153,14' +
|
|
|
|
- '9,98,219,192,221,52,245,209,87,39,252,41,21,68,78,110,131,249,16' +
|
|
|
|
- '2};'
|
|
|
|
- ''
|
|
|
|
- #9'.entry transformKernel ('
|
|
|
|
- #9#9'.param .u32 __cudaparm_transformKernel_g_odata,'
|
|
|
|
- #9#9'.param .s32 __cudaparm_transformKernel_width,'
|
|
|
|
- #9#9'.param .s32 __cudaparm_transformKernel_height,'
|
|
|
|
- #9#9'.param .f32 __cudaparm_transformKernel_theta)'
|
|
|
|
- #9'{'
|
|
|
|
- #9'.reg .u16 %rh<6>;'
|
|
|
|
- #9'.reg .u32 %r<279>;'
|
|
|
|
- #9'.reg .f32 %f<145>;'
|
|
|
|
- #9'.reg .pred %p<50>;'
|
|
|
|
- #9'.local .align 4 .b8 __cuda_result_16[28];'
|
|
|
|
- #9'.local .align 4 .b8 __cuda_result_44[28];'
|
|
|
|
- #9'.loc'#9'15'#9'10'#9'0'
|
|
|
|
- '$LBB1_transformKernel:'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'abs.f32 '#9'%f2, %f1;'
|
|
|
|
- #9'mov.f32 '#9'%f3, 0f7f800000; '#9'// 1.#INF'
|
|
|
|
- #9'setp.eq.f32 '#9'%p1, %f2, %f3;'
|
|
|
|
- #9'@!%p1 bra '#9'$Lt_0_46850;'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'neg.f32 '#9'%f4, %f1;'
|
|
|
|
- #9'add.rn.f32 '#9'%f5, %f1, %f4;'
|
|
|
|
- #9'mov.u32 '#9'%r1, __cudart_i2opi_f;'
|
|
|
|
- #9'mov.u32 '#9'%r2, __cuda_result_16;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_3330;'
|
|
|
|
- '$Lt_0_46850:'
|
|
|
|
- #9'.loc'#9'18'#9'1622'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f6, 0f473ba700; '#9'// 48039'
|
|
|
|
- #9'setp.gt.f32 '#9'%p2, %f2, %f6;'
|
|
|
|
- #9'.loc'#9'18'#9'1625'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r1, __cudart_i2opi_f;'
|
|
|
|
- #9'.loc'#9'18'#9'1622'#9'0'
|
|
|
|
- #9'@!%p2 bra '#9'$Lt_0_47362;'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1625'#9'0'
|
|
|
|
- #9'mov.b32 '#9'%r3, %f1;'
|
|
|
|
- #9'and.b32 '#9'%r4, %r3, -2147483648;'
|
|
|
|
- #9'mov.s32 '#9'%r5, %r4;'
|
|
|
|
- #9'.loc'#9'18'#9'24'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r6, %r3, 8;'
|
|
|
|
- #9'mov.s32 '#9'%r7, %r1;'
|
|
|
|
- #9'add.u32 '#9'%r8, %r1, 24;'
|
|
|
|
- #9'mov.u32 '#9'%r9, __cuda_result_16;'
|
|
|
|
- #9'or.b32 '#9'%r10, %r6, -2147483648;'
|
|
|
|
- #9'mov.u32 '#9'%r11, 0;'
|
|
|
|
- '$Lt_0_48386:'
|
|
|
|
- ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
|
|
|
|
- #9'.loc'#9'18'#9'1642'#9'0'
|
|
|
|
- #9'ld.const.u32 '#9'%r12, [%r7+0];'
|
|
|
|
- #9'mul.lo.u32 '#9'%r13, %r12, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r14, %r13, %r11;'
|
|
|
|
- #9'.loc'#9'18'#9'1643'#9'0'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r15, %r13, %r14;'
|
|
|
|
- #9'neg.s32 '#9'%r16, %r15;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r17, %r12, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r16, %r17;'
|
|
|
|
- #9'.loc'#9'18'#9'1644'#9'0'
|
|
|
|
- #9'st.local.u32 '#9'[%r9+0], %r14;'
|
|
|
|
- #9'add.u32 '#9'%r9, %r9, 4;'
|
|
|
|
- #9'add.u32 '#9'%r7, %r7, 4;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p3, %r7, %r8;'
|
|
|
|
- #9'@%p3 bra '#9'$Lt_0_48386;'
|
|
|
|
- #9'.loc'#9'18'#9'1646'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r2, __cuda_result_16;'
|
|
|
|
- #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;'
|
|
|
|
- #9'.loc'#9'18'#9'1651'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r18, %r3, 1;'
|
|
|
|
- #9'shr.u32 '#9'%r19, %r18, 24;'
|
|
|
|
- #9'sub.u32 '#9'%r20, %r19, 128;'
|
|
|
|
- #9'shr.u32 '#9'%r21, %r20, 5;'
|
|
|
|
- #9'mov.s32 '#9'%r22, 4;'
|
|
|
|
- #9'sub.s32 '#9'%r23, %r22, %r21;'
|
|
|
|
- #9'mul.lo.u32 '#9'%r24, %r23, 4;'
|
|
|
|
- #9'add.u32 '#9'%r25, %r24, %r2;'
|
|
|
|
- #9'ld.local.u32 '#9'%r11, [%r25+8];'
|
|
|
|
- #9'.loc'#9'18'#9'1652'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r26, [%r25+4];'
|
|
|
|
- #9'and.b32 '#9'%r27, %r20, 31;'
|
|
|
|
- #9'mov.u32 '#9'%r28, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p4, %r27, %r28;'
|
|
|
|
- #9'@%p4 bra '#9'$Lt_0_48898;'
|
|
|
|
- #9'.loc'#9'18'#9'1655'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r29, 32;'
|
|
|
|
- #9'sub.s32 '#9'%r30, %r29, %r27;'
|
|
|
|
- #9'shr.u32 '#9'%r31, %r26, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r32, %r11, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r31, %r32;'
|
|
|
|
- #9'.loc'#9'18'#9'1656'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r33, [%r25+0];'
|
|
|
|
- #9'shr.u32 '#9'%r34, %r33, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r35, %r26, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r26, %r34, %r35;'
|
|
|
|
- '$Lt_0_48898:'
|
|
|
|
- #9'.loc'#9'18'#9'1658'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r36, %r11, 30;'
|
|
|
|
- #9'.loc'#9'18'#9'1660'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r37, %r26, 30;'
|
|
|
|
- #9'shl.b32 '#9'%r38, %r11, 2;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r37, %r38;'
|
|
|
|
- #9'.loc'#9'18'#9'1661'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r26, %r26, 2;'
|
|
|
|
- #9'mov.u32 '#9'%r39, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p5, %r26, %r39;'
|
|
|
|
- #9'@%p5 bra '#9'$Lt_0_49666;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r40, %r11, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r41, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r42, %r40, %r41;'
|
|
|
|
- #9'neg.s32 '#9'%r43, %r42;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_49410;'
|
|
|
|
- '$Lt_0_49666:'
|
|
|
|
- #9'mov.u32 '#9'%r44, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r45, %r11, %r44;'
|
|
|
|
- #9'neg.s32 '#9'%r43, %r45;'
|
|
|
|
- '$Lt_0_49410:'
|
|
|
|
- #9'.loc'#9'18'#9'1663'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r36, %r36, %r43;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r46, %r36;'
|
|
|
|
- #9'mov.u32 '#9'%r47, 0;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p6, %r4, %r47;'
|
|
|
|
- #9'selp.s32 '#9'%r36, %r46, %r36, %p6;'
|
|
|
|
- #9'mov.u32 '#9'%r48, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p7, %r43, %r48;'
|
|
|
|
- #9'@%p7 bra '#9'$Lt_0_49922;'
|
|
|
|
- #9'.loc'#9'18'#9'1668'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r26, %r26;'
|
|
|
|
- #9'.loc'#9'18'#9'1670'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r49, 0;'
|
|
|
|
- #9'set.eq.u32.u32 '#9'%r50, %r26, %r49;'
|
|
|
|
- #9'neg.s32 '#9'%r51, %r50;'
|
|
|
|
- #9'not.b32 '#9'%r52, %r11;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r51, %r52;'
|
|
|
|
- #9'.loc'#9'18'#9'1671'#9'0'
|
|
|
|
- #9'xor.b32 '#9'%r5, %r4, -2147483648;'
|
|
|
|
- '$Lt_0_49922:'
|
|
|
|
- #9'.loc'#9'18'#9'1673'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r53, %r36;'
|
|
|
|
- #9'mov.u32 '#9'%r54, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p8, %r11, %r54;'
|
|
|
|
- #9'mov.u32 '#9'%r55, 0;'
|
|
|
|
- #9'@%p8 bra '#9'$Lt_0_69378;'
|
|
|
|
- '$Lt_0_50946:'
|
|
|
|
-
|
|
|
|
- ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
|
|
|
|
- 'tions: unknown'
|
|
|
|
- #9'.loc'#9'18'#9'1677'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r56, %r26, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r57, %r11, 1;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r56, %r57;'
|
|
|
|
- #9'.loc'#9'18'#9'1678'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r26, %r26, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1679'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r55, %r55, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r58, 0;'
|
|
|
|
- #9'setp.gt.s32 '#9'%p9, %r11, %r58;'
|
|
|
|
- #9'@%p9 bra '#9'$Lt_0_50946;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_50434;'
|
|
|
|
- '$Lt_0_69378:'
|
|
|
|
- '$Lt_0_50434:'
|
|
|
|
- #9'.loc'#9'18'#9'1681'#9'0'
|
|
|
|
- #9'mul.lo.u32 '#9'%r26, %r11, -921707870;'
|
|
|
|
- #9'.loc'#9'18'#9'1682'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r59, -921707870;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r11, %r11, %r59;'
|
|
|
|
- #9'mov.u32 '#9'%r60, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p10, %r11, %r60;'
|
|
|
|
- #9'@%p10 bra '#9'$Lt_0_51458;'
|
|
|
|
- #9'.loc'#9'18'#9'1684'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r61, %r26, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r62, %r11, 1;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r61, %r62;'
|
|
|
|
- #9'.loc'#9'18'#9'1685'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r26, %r26, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1686'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r55, %r55, 1;'
|
|
|
|
- '$Lt_0_51458:'
|
|
|
|
- #9'.loc'#9'18'#9'1688'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r63, 0;'
|
|
|
|
- #9'set.ne.u32.u32 '#9'%r64, %r26, %r63;'
|
|
|
|
- #9'neg.s32 '#9'%r65, %r64;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r65, %r11;'
|
|
|
|
- #9'shl.b32 '#9'%r66, %r11, 24;'
|
|
|
|
- #9'mov.s32 '#9'%r67, 0;'
|
|
|
|
- #9'set.lt.u32.s32 '#9'%r68, %r66, %r67;'
|
|
|
|
- #9'neg.s32 '#9'%r69, %r68;'
|
|
|
|
- #9'shr.u32 '#9'%r70, %r11, 8;'
|
|
|
|
- #9'add.u32 '#9'%r71, %r55, 126;'
|
|
|
|
- #9'shl.b32 '#9'%r72, %r71, 23;'
|
|
|
|
- #9'add.u32 '#9'%r73, %r70, %r72;'
|
|
|
|
- #9'add.u32 '#9'%r74, %r69, %r73;'
|
|
|
|
- #9'or.b32 '#9'%r75, %r5, %r74;'
|
|
|
|
- #9'mov.b32 '#9'%f7, %r75;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_3586;'
|
|
|
|
- '$Lt_0_47362:'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f8, 0f3f22f983; '#9'// 0.63662'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mul.f32 '#9'%f9, %f1, %f8;'
|
|
|
|
- #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
|
|
|
|
- #9'mov.s32 '#9'%r53, %r76;'
|
|
|
|
- #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
|
|
|
|
- #9'neg.f32 '#9'%f11, %f10;'
|
|
|
|
- #9'mov.f32 '#9'%f12, 0f3fc90000; '#9'// 1.57031'
|
|
|
|
- #9'mad.f32 '#9'%f13, %f12, %f11, %f1;'
|
|
|
|
- #9'mov.f32 '#9'%f14, 0f39fd8000; '#9'// 0.000483513'
|
|
|
|
- #9'mad.f32 '#9'%f15, %f14, %f11, %f13;'
|
|
|
|
- #9'mov.f32 '#9'%f16, 0f34a88000; '#9'// 3.13856e-007'
|
|
|
|
- #9'mad.f32 '#9'%f17, %f16, %f11, %f15;'
|
|
|
|
- #9'mov.f32 '#9'%f18, 0f2e85a309; '#9'// 6.0771e-011'
|
|
|
|
- #9'mad.f32 '#9'%f7, %f18, %f11, %f17;'
|
|
|
|
- #9'mov.u32 '#9'%r2, __cuda_result_16;'
|
|
|
|
- '$Lt_0_3586:'
|
|
|
|
- #9'.loc'#9'18'#9'1949'#9'0'
|
|
|
|
- #9'add.s32 '#9'%r77, %r53, 1;'
|
|
|
|
- #9'mul.f32 '#9'%f19, %f7, %f7;'
|
|
|
|
- #9'and.b32 '#9'%r78, %r77, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r79, 0;'
|
|
|
|
- #9'setp.eq.s32 '#9'%p11, %r78, %r79;'
|
|
|
|
- #9'@%p11 bra '#9'$Lt_0_52226;'
|
|
|
|
- #9'.loc'#9'18'#9'1953'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f20, 0f3f800000; '#9'// 1'
|
|
|
|
- #9'mov.f32 '#9'%f21, 0fbf000000; '#9'// -0.5'
|
|
|
|
- #9'mov.f32 '#9'%f22, 0f3d2aaaa5; '#9'// 0.0416666'
|
|
|
|
- #9'mov.f32 '#9'%f23, 0fbab6061a; '#9'// -0.00138873'
|
|
|
|
- #9'mov.f32 '#9'%f24, 0f37ccf5ce; '#9'// 2.44332e-005'
|
|
|
|
- #9'mad.f32 '#9'%f25, %f24, %f19, %f23;'
|
|
|
|
- #9'mad.f32 '#9'%f26, %f19, %f25, %f22;'
|
|
|
|
- #9'mad.f32 '#9'%f27, %f19, %f26, %f21;'
|
|
|
|
- #9'mad.f32 '#9'%f28, %f19, %f27, %f20;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_51970;'
|
|
|
|
- '$Lt_0_52226:'
|
|
|
|
- #9'.loc'#9'18'#9'1955'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f29, 0fbe2aaaa3; '#9'// -0.166667'
|
|
|
|
- #9'mov.f32 '#9'%f30, 0f3c08839e; '#9'// 0.00833216'
|
|
|
|
- #9'mov.f32 '#9'%f31, 0fb94ca1f9; '#9'// -0.000195153'
|
|
|
|
- #9'mad.f32 '#9'%f32, %f31, %f19, %f30;'
|
|
|
|
- #9'mad.f32 '#9'%f33, %f19, %f32, %f29;'
|
|
|
|
- #9'mul.f32 '#9'%f34, %f19, %f33;'
|
|
|
|
- #9'mad.f32 '#9'%f28, %f34, %f7, %f7;'
|
|
|
|
- '$Lt_0_51970:'
|
|
|
|
- #9'.loc'#9'18'#9'1957'#9'0'
|
|
|
|
- #9'neg.f32 '#9'%f35, %f28;'
|
|
|
|
- #9'and.b32 '#9'%r80, %r77, 2;'
|
|
|
|
- #9'mov.s32 '#9'%r81, 0;'
|
|
|
|
- #9'setp.ne.s32 '#9'%p12, %r80, %r81;'
|
|
|
|
- #9'selp.f32 '#9'%f28, %f35, %f28, %p12;'
|
|
|
|
- #9'mov.f32 '#9'%f5, %f28;'
|
|
|
|
- '$Lt_0_3330:'
|
|
|
|
- #9'.loc'#9'18'#9'1869'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f36, 0f00000000; '#9'// 0'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1869'#9'0'
|
|
|
|
- #9'setp.eq.f32 '#9'%p13, %f1, %f36;'
|
|
|
|
- #9'selp.s32 '#9'%r82, 1, 0, %p13;'
|
|
|
|
- #9'selp.s32 '#9'%r83, 1, 0, %p1;'
|
|
|
|
- #9'or.b32 '#9'%r84, %r82, %r83;'
|
|
|
|
- #9'mov.u32 '#9'%r85, 0;'
|
|
|
|
- #9'setp.eq.s32 '#9'%p14, %r84, %r85;'
|
|
|
|
- #9'@%p14 bra '#9'$Lt_0_52482;'
|
|
|
|
- #9'mov.f32 '#9'%f37, 0f00000000; '#9'// 0'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1869'#9'0'
|
|
|
|
- #9'mul.rn.f32 '#9'%f38, %f1, %f37;'
|
|
|
|
- #9'mov.u32 '#9'%r86, __cuda_result_44;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_2306;'
|
|
|
|
- '$Lt_0_52482:'
|
|
|
|
- #9'.loc'#9'18'#9'1622'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f39, 0f473ba700; '#9'// 48039'
|
|
|
|
- #9'setp.gt.f32 '#9'%p15, %f2, %f39;'
|
|
|
|
- #9'@!%p15 bra '#9'$Lt_0_52994;'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1625'#9'0'
|
|
|
|
- #9'mov.b32 '#9'%r3, %f1;'
|
|
|
|
- #9'and.b32 '#9'%r4, %r3, -2147483648;'
|
|
|
|
- #9'mov.s32 '#9'%r87, %r4;'
|
|
|
|
- #9'.loc'#9'18'#9'24'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r6, %r3, 8;'
|
|
|
|
- #9'mov.s32 '#9'%r88, %r1;'
|
|
|
|
- #9'add.u32 '#9'%r8, %r1, 24;'
|
|
|
|
- #9'mov.u32 '#9'%r89, __cuda_result_44;'
|
|
|
|
- #9'or.b32 '#9'%r10, %r6, -2147483648;'
|
|
|
|
- #9'mov.u32 '#9'%r90, 0;'
|
|
|
|
- '$Lt_0_54018:'
|
|
|
|
- ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
|
|
|
|
- #9'.loc'#9'18'#9'1642'#9'0'
|
|
|
|
- #9'ld.const.u32 '#9'%r91, [%r88+0];'
|
|
|
|
- #9'mul.lo.u32 '#9'%r92, %r91, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r93, %r92, %r90;'
|
|
|
|
- #9'.loc'#9'18'#9'1643'#9'0'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r94, %r92, %r93;'
|
|
|
|
- #9'neg.s32 '#9'%r95, %r94;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r96, %r91, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r95, %r96;'
|
|
|
|
- #9'.loc'#9'18'#9'1644'#9'0'
|
|
|
|
- #9'st.local.u32 '#9'[%r89+0], %r93;'
|
|
|
|
- #9'add.u32 '#9'%r89, %r89, 4;'
|
|
|
|
- #9'add.u32 '#9'%r88, %r88, 4;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p16, %r88, %r8;'
|
|
|
|
- #9'@%p16 bra '#9'$Lt_0_54018;'
|
|
|
|
- #9'.loc'#9'18'#9'1646'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r86, __cuda_result_44;'
|
|
|
|
- #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;'
|
|
|
|
- #9'.loc'#9'18'#9'1651'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r97, %r3, 1;'
|
|
|
|
- #9'shr.u32 '#9'%r19, %r97, 24;'
|
|
|
|
- #9'sub.u32 '#9'%r20, %r19, 128;'
|
|
|
|
- #9'shr.u32 '#9'%r21, %r20, 5;'
|
|
|
|
- #9'mov.s32 '#9'%r98, 4;'
|
|
|
|
- #9'sub.s32 '#9'%r23, %r98, %r21;'
|
|
|
|
- #9'mul.lo.u32 '#9'%r24, %r23, 4;'
|
|
|
|
- #9'add.u32 '#9'%r99, %r24, %r86;'
|
|
|
|
- #9'ld.local.u32 '#9'%r90, [%r99+8];'
|
|
|
|
- #9'.loc'#9'18'#9'1652'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r100, [%r99+4];'
|
|
|
|
- #9'and.b32 '#9'%r27, %r20, 31;'
|
|
|
|
- #9'mov.u32 '#9'%r101, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p17, %r27, %r101;'
|
|
|
|
- #9'@%p17 bra '#9'$Lt_0_54530;'
|
|
|
|
- #9'.loc'#9'18'#9'1655'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r102, 32;'
|
|
|
|
- #9'sub.s32 '#9'%r30, %r102, %r27;'
|
|
|
|
- #9'shr.u32 '#9'%r103, %r100, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r104, %r90, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r103, %r104;'
|
|
|
|
- #9'.loc'#9'18'#9'1656'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r105, [%r99+0];'
|
|
|
|
- #9'shr.u32 '#9'%r106, %r105, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r107, %r100, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r100, %r106, %r107;'
|
|
|
|
- '$Lt_0_54530:'
|
|
|
|
- #9'.loc'#9'18'#9'1658'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r108, %r90, 30;'
|
|
|
|
- #9'.loc'#9'18'#9'1660'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r109, %r100, 30;'
|
|
|
|
- #9'shl.b32 '#9'%r110, %r90, 2;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r109, %r110;'
|
|
|
|
- #9'.loc'#9'18'#9'1661'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r100, %r100, 2;'
|
|
|
|
- #9'mov.u32 '#9'%r111, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p18, %r100, %r111;'
|
|
|
|
- #9'@%p18 bra '#9'$Lt_0_55298;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r112, %r90, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r113, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r114, %r112, %r113;'
|
|
|
|
- #9'neg.s32 '#9'%r115, %r114;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_55042;'
|
|
|
|
- '$Lt_0_55298:'
|
|
|
|
- #9'mov.u32 '#9'%r116, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r117, %r90, %r116;'
|
|
|
|
- #9'neg.s32 '#9'%r115, %r117;'
|
|
|
|
- '$Lt_0_55042:'
|
|
|
|
- #9'.loc'#9'18'#9'1663'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r108, %r108, %r115;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r118, %r108;'
|
|
|
|
- #9'mov.u32 '#9'%r119, 0;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p19, %r4, %r119;'
|
|
|
|
- #9'selp.s32 '#9'%r108, %r118, %r108, %p19;'
|
|
|
|
- #9'mov.u32 '#9'%r120, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p20, %r115, %r120;'
|
|
|
|
- #9'@%p20 bra '#9'$Lt_0_55554;'
|
|
|
|
- #9'.loc'#9'18'#9'1668'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r100, %r100;'
|
|
|
|
- #9'.loc'#9'18'#9'1670'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r121, 0;'
|
|
|
|
- #9'set.eq.u32.u32 '#9'%r122, %r100, %r121;'
|
|
|
|
- #9'neg.s32 '#9'%r123, %r122;'
|
|
|
|
- #9'not.b32 '#9'%r124, %r90;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r123, %r124;'
|
|
|
|
- #9'.loc'#9'18'#9'1671'#9'0'
|
|
|
|
- #9'xor.b32 '#9'%r87, %r4, -2147483648;'
|
|
|
|
- '$Lt_0_55554:'
|
|
|
|
- #9'.loc'#9'18'#9'1673'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r125, %r108;'
|
|
|
|
- #9'mov.u32 '#9'%r126, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p21, %r90, %r126;'
|
|
|
|
- #9'mov.u32 '#9'%r127, 0;'
|
|
|
|
- #9'@%p21 bra '#9'$Lt_0_69634;'
|
|
|
|
- '$Lt_0_56578:'
|
|
|
|
-
|
|
|
|
- ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
|
|
|
|
- 'tions: unknown'
|
|
|
|
- #9'.loc'#9'18'#9'1677'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r128, %r100, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r129, %r90, 1;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r128, %r129;'
|
|
|
|
- #9'.loc'#9'18'#9'1678'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r100, %r100, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1679'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r127, %r127, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r130, 0;'
|
|
|
|
- #9'setp.gt.s32 '#9'%p22, %r90, %r130;'
|
|
|
|
- #9'@%p22 bra '#9'$Lt_0_56578;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_56066;'
|
|
|
|
- '$Lt_0_69634:'
|
|
|
|
- '$Lt_0_56066:'
|
|
|
|
- #9'.loc'#9'18'#9'1681'#9'0'
|
|
|
|
- #9'mul.lo.u32 '#9'%r100, %r90, -921707870;'
|
|
|
|
- #9'.loc'#9'18'#9'1682'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r131, -921707870;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r90, %r90, %r131;'
|
|
|
|
- #9'mov.u32 '#9'%r132, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p23, %r90, %r132;'
|
|
|
|
- #9'@%p23 bra '#9'$Lt_0_57090;'
|
|
|
|
- #9'.loc'#9'18'#9'1684'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r133, %r100, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r134, %r90, 1;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r133, %r134;'
|
|
|
|
- #9'.loc'#9'18'#9'1685'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r100, %r100, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1686'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r127, %r127, 1;'
|
|
|
|
- '$Lt_0_57090:'
|
|
|
|
- #9'.loc'#9'18'#9'1688'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r135, 0;'
|
|
|
|
- #9'set.ne.u32.u32 '#9'%r136, %r100, %r135;'
|
|
|
|
- #9'neg.s32 '#9'%r137, %r136;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r137, %r90;'
|
|
|
|
- #9'shl.b32 '#9'%r138, %r90, 24;'
|
|
|
|
- #9'mov.s32 '#9'%r139, 0;'
|
|
|
|
- #9'set.lt.u32.s32 '#9'%r140, %r138, %r139;'
|
|
|
|
- #9'neg.s32 '#9'%r141, %r140;'
|
|
|
|
- #9'shr.u32 '#9'%r142, %r90, 8;'
|
|
|
|
- #9'add.u32 '#9'%r143, %r127, 126;'
|
|
|
|
- #9'shl.b32 '#9'%r144, %r143, 23;'
|
|
|
|
- #9'add.u32 '#9'%r145, %r142, %r144;'
|
|
|
|
- #9'add.u32 '#9'%r146, %r141, %r145;'
|
|
|
|
- #9'or.b32 '#9'%r147, %r87, %r146;'
|
|
|
|
- #9'mov.b32 '#9'%f40, %r147;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_2562;'
|
|
|
|
- '$Lt_0_52994:'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f41, 0f3f22f983; '#9'// 0.63662'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mul.f32 '#9'%f9, %f1, %f41;'
|
|
|
|
- #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
|
|
|
|
- #9'mov.s32 '#9'%r125, %r76;'
|
|
|
|
- #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
|
|
|
|
- #9'neg.f32 '#9'%f11, %f10;'
|
|
|
|
- #9'mov.f32 '#9'%f42, 0f3fc90000; '#9'// 1.57031'
|
|
|
|
- #9'mad.f32 '#9'%f43, %f42, %f11, %f1;'
|
|
|
|
- #9'mov.f32 '#9'%f44, 0f39fd8000; '#9'// 0.000483513'
|
|
|
|
- #9'mad.f32 '#9'%f45, %f44, %f11, %f43;'
|
|
|
|
- #9'mov.f32 '#9'%f46, 0f34a88000; '#9'// 3.13856e-007'
|
|
|
|
- #9'mad.f32 '#9'%f47, %f46, %f11, %f45;'
|
|
|
|
- #9'mov.f32 '#9'%f48, 0f2e85a309; '#9'// 6.0771e-011'
|
|
|
|
- #9'mad.f32 '#9'%f40, %f48, %f11, %f47;'
|
|
|
|
- #9'mov.u32 '#9'%r86, __cuda_result_44;'
|
|
|
|
- '$Lt_0_2562:'
|
|
|
|
- #9'.loc'#9'18'#9'1872'#9'0'
|
|
|
|
- #9'mul.f32 '#9'%f49, %f40, %f40;'
|
|
|
|
- #9'and.b32 '#9'%r148, %r125, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r149, 0;'
|
|
|
|
- #9'setp.eq.s32 '#9'%p24, %r148, %r149;'
|
|
|
|
- #9'@%p24 bra '#9'$Lt_0_57858;'
|
|
|
|
- #9'.loc'#9'18'#9'1875'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f50, 0f3f800000; '#9'// 1'
|
|
|
|
- #9'mov.f32 '#9'%f51, 0fbf000000; '#9'// -0.5'
|
|
|
|
- #9'mov.f32 '#9'%f52, 0f3d2aaaa5; '#9'// 0.0416666'
|
|
|
|
- #9'mov.f32 '#9'%f53, 0fbab6061a; '#9'// -0.00138873'
|
|
|
|
- #9'mov.f32 '#9'%f54, 0f37ccf5ce; '#9'// 2.44332e-005'
|
|
|
|
- #9'mad.f32 '#9'%f55, %f54, %f49, %f53;'
|
|
|
|
- #9'mad.f32 '#9'%f56, %f49, %f55, %f52;'
|
|
|
|
- #9'mad.f32 '#9'%f57, %f49, %f56, %f51;'
|
|
|
|
- #9'mad.f32 '#9'%f58, %f49, %f57, %f50;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_57602;'
|
|
|
|
- '$Lt_0_57858:'
|
|
|
|
- #9'.loc'#9'18'#9'1877'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f59, 0fbe2aaaa3; '#9'// -0.166667'
|
|
|
|
- #9'mov.f32 '#9'%f60, 0f3c08839e; '#9'// 0.00833216'
|
|
|
|
- #9'mov.f32 '#9'%f61, 0fb94ca1f9; '#9'// -0.000195153'
|
|
|
|
- #9'mad.f32 '#9'%f62, %f61, %f49, %f60;'
|
|
|
|
- #9'mad.f32 '#9'%f63, %f49, %f62, %f59;'
|
|
|
|
- #9'mul.f32 '#9'%f64, %f49, %f63;'
|
|
|
|
- #9'mad.f32 '#9'%f58, %f64, %f40, %f40;'
|
|
|
|
- '$Lt_0_57602:'
|
|
|
|
- #9'.loc'#9'18'#9'1879'#9'0'
|
|
|
|
- #9'neg.f32 '#9'%f65, %f58;'
|
|
|
|
- #9'and.b32 '#9'%r150, %r125, 2;'
|
|
|
|
- #9'mov.s32 '#9'%r151, 0;'
|
|
|
|
- #9'setp.ne.s32 '#9'%p25, %r150, %r151;'
|
|
|
|
- #9'selp.f32 '#9'%f58, %f65, %f58, %p25;'
|
|
|
|
- #9'mov.f32 '#9'%f38, %f58;'
|
|
|
|
- '$Lt_0_2306:'
|
|
|
|
- #9'.loc'#9'15'#9'22'#9'0'
|
|
|
|
- #9'mov.u16 '#9'%rh1, %ctaid.y;'
|
|
|
|
- #9'mov.u16 '#9'%rh2, %ntid.y;'
|
|
|
|
- #9'mul.wide.u16 '#9'%r152, %rh1, %rh2;'
|
|
|
|
- #9'ld.param.s32 '#9'%r153, [__cudaparm_transformKernel_height];'
|
|
|
|
- #9'cvt.rn.f32.s32 '#9'%f66, %r153;'
|
|
|
|
- #9'mov.u16 '#9'%rh3, %ctaid.x;'
|
|
|
|
- #9'mov.u16 '#9'%rh4, %ntid.x;'
|
|
|
|
- #9'mul.wide.u16 '#9'%r154, %rh3, %rh4;'
|
|
|
|
- #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];'
|
|
|
|
- #9'cvt.rn.f32.s32 '#9'%f67, %r155;'
|
|
|
|
- #9'cvt.u32.u16 '#9'%r156, %tid.y;'
|
|
|
|
- #9'add.u32 '#9'%r157, %r156, %r152;'
|
|
|
|
- #9'cvt.u32.u16 '#9'%r158, %tid.x;'
|
|
|
|
- #9'add.u32 '#9'%r159, %r158, %r154;'
|
|
|
|
- #9'cvt.rn.f32.u32 '#9'%f68, %r157;'
|
|
|
|
- #9'cvt.rn.f32.u32 '#9'%f69, %r159;'
|
|
|
|
- #9'div.full.f32 '#9'%f70, %f68, %f66;'
|
|
|
|
- #9'div.full.f32 '#9'%f71, %f69, %f67;'
|
|
|
|
- #9'mov.f32 '#9'%f72, 0fbf000000; '#9'// -0.5'
|
|
|
|
- #9'add.f32 '#9'%f73, %f70, %f72;'
|
|
|
|
- #9'mov.f32 '#9'%f74, 0fbf000000; '#9'// -0.5'
|
|
|
|
- #9'add.f32 '#9'%f75, %f71, %f74;'
|
|
|
|
- #9'mul.f32 '#9'%f76, %f38, %f73;'
|
|
|
|
- #9'mul.f32 '#9'%f77, %f75, %f5;'
|
|
|
|
- #9'sub.f32 '#9'%f78, %f77, %f76;'
|
|
|
|
- #9'mov.f32 '#9'%f79, 0f3f000000; '#9'// 0.5'
|
|
|
|
- #9'add.f32 '#9'%f80, %f78, %f79;'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'@!%p1 bra '#9'$Lt_0_58114;'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'neg.f32 '#9'%f81, %f1;'
|
|
|
|
- #9'add.rn.f32 '#9'%f82, %f1, %f81;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_1282;'
|
|
|
|
- '$Lt_0_58114:'
|
|
|
|
- #9'.loc'#9'18'#9'1622'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f83, 0f473ba700; '#9'// 48039'
|
|
|
|
- #9'setp.gt.f32 '#9'%p26, %f2, %f83;'
|
|
|
|
- #9'@!%p26 bra '#9'$Lt_0_58626;'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1625'#9'0'
|
|
|
|
- #9'mov.b32 '#9'%r3, %f1;'
|
|
|
|
- #9'and.b32 '#9'%r4, %r3, -2147483648;'
|
|
|
|
- #9'mov.s32 '#9'%r5, %r4;'
|
|
|
|
- #9'.loc'#9'18'#9'24'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r6, %r3, 8;'
|
|
|
|
- #9'mov.s32 '#9'%r7, %r1;'
|
|
|
|
- #9'add.u32 '#9'%r8, %r1, 24;'
|
|
|
|
- #9'mov.u32 '#9'%r9, __cuda_result_16;'
|
|
|
|
- #9'or.b32 '#9'%r10, %r6, -2147483648;'
|
|
|
|
- #9'mov.u32 '#9'%r11, 0;'
|
|
|
|
- '$Lt_0_59650:'
|
|
|
|
- ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
|
|
|
|
- #9'.loc'#9'18'#9'1642'#9'0'
|
|
|
|
- #9'ld.const.u32 '#9'%r160, [%r7+0];'
|
|
|
|
- #9'mul.lo.u32 '#9'%r161, %r160, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r162, %r161, %r11;'
|
|
|
|
- #9'.loc'#9'18'#9'1643'#9'0'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r163, %r161, %r162;'
|
|
|
|
- #9'neg.s32 '#9'%r164, %r163;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r165, %r160, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r164, %r165;'
|
|
|
|
- #9'.loc'#9'18'#9'1644'#9'0'
|
|
|
|
- #9'st.local.u32 '#9'[%r9+0], %r162;'
|
|
|
|
- #9'add.u32 '#9'%r9, %r9, 4;'
|
|
|
|
- #9'add.u32 '#9'%r7, %r7, 4;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p27, %r7, %r8;'
|
|
|
|
- #9'@%p27 bra '#9'$Lt_0_59650;'
|
|
|
|
- #9'.loc'#9'18'#9'1646'#9'0'
|
|
|
|
- #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;'
|
|
|
|
- #9'.loc'#9'18'#9'1651'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r166, %r3, 1;'
|
|
|
|
- #9'shr.u32 '#9'%r19, %r166, 24;'
|
|
|
|
- #9'sub.u32 '#9'%r20, %r19, 128;'
|
|
|
|
- #9'shr.u32 '#9'%r21, %r20, 5;'
|
|
|
|
- #9'mov.s32 '#9'%r167, 4;'
|
|
|
|
- #9'sub.s32 '#9'%r23, %r167, %r21;'
|
|
|
|
- #9'mul.lo.u32 '#9'%r24, %r23, 4;'
|
|
|
|
- #9'add.u32 '#9'%r25, %r24, %r2;'
|
|
|
|
- #9'ld.local.u32 '#9'%r11, [%r25+8];'
|
|
|
|
- #9'.loc'#9'18'#9'1652'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r26, [%r25+4];'
|
|
|
|
- #9'and.b32 '#9'%r27, %r20, 31;'
|
|
|
|
- #9'mov.u32 '#9'%r168, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p28, %r27, %r168;'
|
|
|
|
- #9'@%p28 bra '#9'$Lt_0_60162;'
|
|
|
|
- #9'.loc'#9'18'#9'1655'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r169, 32;'
|
|
|
|
- #9'sub.s32 '#9'%r30, %r169, %r27;'
|
|
|
|
- #9'shr.u32 '#9'%r170, %r26, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r171, %r11, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r170, %r171;'
|
|
|
|
- #9'.loc'#9'18'#9'1656'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r172, [%r25+0];'
|
|
|
|
- #9'shr.u32 '#9'%r173, %r172, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r174, %r26, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r26, %r173, %r174;'
|
|
|
|
- '$Lt_0_60162:'
|
|
|
|
- #9'.loc'#9'18'#9'1658'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r36, %r11, 30;'
|
|
|
|
- #9'.loc'#9'18'#9'1660'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r175, %r26, 30;'
|
|
|
|
- #9'shl.b32 '#9'%r176, %r11, 2;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r175, %r176;'
|
|
|
|
- #9'.loc'#9'18'#9'1661'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r26, %r26, 2;'
|
|
|
|
- #9'mov.u32 '#9'%r177, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p29, %r26, %r177;'
|
|
|
|
- #9'@%p29 bra '#9'$Lt_0_60930;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r178, %r11, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r179, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r180, %r178, %r179;'
|
|
|
|
- #9'neg.s32 '#9'%r181, %r180;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_60674;'
|
|
|
|
- '$Lt_0_60930:'
|
|
|
|
- #9'mov.u32 '#9'%r182, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r183, %r11, %r182;'
|
|
|
|
- #9'neg.s32 '#9'%r181, %r183;'
|
|
|
|
- '$Lt_0_60674:'
|
|
|
|
- #9'.loc'#9'18'#9'1663'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r36, %r36, %r181;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r184, %r36;'
|
|
|
|
- #9'mov.u32 '#9'%r185, 0;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p30, %r4, %r185;'
|
|
|
|
- #9'selp.s32 '#9'%r36, %r184, %r36, %p30;'
|
|
|
|
- #9'mov.u32 '#9'%r186, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p31, %r181, %r186;'
|
|
|
|
- #9'@%p31 bra '#9'$Lt_0_61186;'
|
|
|
|
- #9'.loc'#9'18'#9'1668'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r26, %r26;'
|
|
|
|
- #9'.loc'#9'18'#9'1670'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r187, 0;'
|
|
|
|
- #9'set.eq.u32.u32 '#9'%r188, %r26, %r187;'
|
|
|
|
- #9'neg.s32 '#9'%r189, %r188;'
|
|
|
|
- #9'not.b32 '#9'%r190, %r11;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r189, %r190;'
|
|
|
|
- #9'.loc'#9'18'#9'1671'#9'0'
|
|
|
|
- #9'xor.b32 '#9'%r5, %r4, -2147483648;'
|
|
|
|
- '$Lt_0_61186:'
|
|
|
|
- #9'.loc'#9'18'#9'1673'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r53, %r36;'
|
|
|
|
- #9'mov.u32 '#9'%r191, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p32, %r11, %r191;'
|
|
|
|
- #9'@%p32 bra '#9'$Lt_0_69890;'
|
|
|
|
- #9'mov.u32 '#9'%r55, 0;'
|
|
|
|
- '$Lt_0_62210:'
|
|
|
|
-
|
|
|
|
- ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
|
|
|
|
- 'tions: unknown'
|
|
|
|
- #9'.loc'#9'18'#9'1677'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r192, %r26, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r193, %r11, 1;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r192, %r193;'
|
|
|
|
- #9'.loc'#9'18'#9'1678'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r26, %r26, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1679'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r55, %r55, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r194, 0;'
|
|
|
|
- #9'setp.gt.s32 '#9'%p33, %r11, %r194;'
|
|
|
|
- #9'@%p33 bra '#9'$Lt_0_62210;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_61698;'
|
|
|
|
- '$Lt_0_69890:'
|
|
|
|
- #9'mov.u32 '#9'%r55, 0;'
|
|
|
|
- '$Lt_0_61698:'
|
|
|
|
- #9'.loc'#9'18'#9'1681'#9'0'
|
|
|
|
- #9'mul.lo.u32 '#9'%r26, %r11, -921707870;'
|
|
|
|
- #9'.loc'#9'18'#9'1682'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r195, -921707870;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r11, %r11, %r195;'
|
|
|
|
- #9'mov.u32 '#9'%r196, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p34, %r11, %r196;'
|
|
|
|
- #9'@%p34 bra '#9'$Lt_0_62722;'
|
|
|
|
- #9'.loc'#9'18'#9'1684'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r197, %r26, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r198, %r11, 1;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r197, %r198;'
|
|
|
|
- #9'.loc'#9'18'#9'1685'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r26, %r26, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1686'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r55, %r55, 1;'
|
|
|
|
- '$Lt_0_62722:'
|
|
|
|
- #9'.loc'#9'18'#9'1688'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r199, 0;'
|
|
|
|
- #9'set.ne.u32.u32 '#9'%r200, %r26, %r199;'
|
|
|
|
- #9'neg.s32 '#9'%r201, %r200;'
|
|
|
|
- #9'add.u32 '#9'%r11, %r201, %r11;'
|
|
|
|
- #9'shl.b32 '#9'%r202, %r11, 24;'
|
|
|
|
- #9'mov.s32 '#9'%r203, 0;'
|
|
|
|
- #9'set.lt.u32.s32 '#9'%r204, %r202, %r203;'
|
|
|
|
- #9'neg.s32 '#9'%r205, %r204;'
|
|
|
|
- #9'shr.u32 '#9'%r206, %r11, 8;'
|
|
|
|
- #9'add.u32 '#9'%r207, %r55, 126;'
|
|
|
|
- #9'shl.b32 '#9'%r208, %r207, 23;'
|
|
|
|
- #9'add.u32 '#9'%r209, %r206, %r208;'
|
|
|
|
- #9'add.u32 '#9'%r210, %r205, %r209;'
|
|
|
|
- #9'or.b32 '#9'%r211, %r5, %r210;'
|
|
|
|
- #9'mov.b32 '#9'%f7, %r211;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_1538;'
|
|
|
|
- '$Lt_0_58626:'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f84, 0f3f22f983; '#9'// 0.63662'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mul.f32 '#9'%f9, %f1, %f84;'
|
|
|
|
- #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
|
|
|
|
- #9'mov.s32 '#9'%r53, %r76;'
|
|
|
|
- #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
|
|
|
|
- #9'neg.f32 '#9'%f11, %f10;'
|
|
|
|
- #9'mov.f32 '#9'%f85, 0f3fc90000; '#9'// 1.57031'
|
|
|
|
- #9'mad.f32 '#9'%f86, %f85, %f11, %f1;'
|
|
|
|
- #9'mov.f32 '#9'%f87, 0f39fd8000; '#9'// 0.000483513'
|
|
|
|
- #9'mad.f32 '#9'%f88, %f87, %f11, %f86;'
|
|
|
|
- #9'mov.f32 '#9'%f89, 0f34a88000; '#9'// 3.13856e-007'
|
|
|
|
- #9'mad.f32 '#9'%f90, %f89, %f11, %f88;'
|
|
|
|
- #9'mov.f32 '#9'%f91, 0f2e85a309; '#9'// 6.0771e-011'
|
|
|
|
- #9'mad.f32 '#9'%f7, %f91, %f11, %f90;'
|
|
|
|
- '$Lt_0_1538:'
|
|
|
|
- #9'.loc'#9'18'#9'1949'#9'0'
|
|
|
|
- #9'add.s32 '#9'%r77, %r53, 1;'
|
|
|
|
- #9'mul.f32 '#9'%f19, %f7, %f7;'
|
|
|
|
- #9'and.b32 '#9'%r212, %r77, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r213, 0;'
|
|
|
|
- #9'setp.eq.s32 '#9'%p35, %r212, %r213;'
|
|
|
|
- #9'@%p35 bra '#9'$Lt_0_63490;'
|
|
|
|
- #9'.loc'#9'18'#9'1953'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f92, 0f3f800000; '#9'// 1'
|
|
|
|
- #9'mov.f32 '#9'%f93, 0fbf000000; '#9'// -0.5'
|
|
|
|
- #9'mov.f32 '#9'%f94, 0f3d2aaaa5; '#9'// 0.0416666'
|
|
|
|
- #9'mov.f32 '#9'%f95, 0fbab6061a; '#9'// -0.00138873'
|
|
|
|
- #9'mov.f32 '#9'%f96, 0f37ccf5ce; '#9'// 2.44332e-005'
|
|
|
|
- #9'mad.f32 '#9'%f97, %f96, %f19, %f95;'
|
|
|
|
- #9'mad.f32 '#9'%f98, %f19, %f97, %f94;'
|
|
|
|
- #9'mad.f32 '#9'%f99, %f19, %f98, %f93;'
|
|
|
|
- #9'mad.f32 '#9'%f28, %f19, %f99, %f92;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_63234;'
|
|
|
|
- '$Lt_0_63490:'
|
|
|
|
- #9'.loc'#9'18'#9'1955'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f100, 0fbe2aaaa3; '#9'// -0.166667'
|
|
|
|
- #9'mov.f32 '#9'%f101, 0f3c08839e; '#9'// 0.00833216'
|
|
|
|
- #9'mov.f32 '#9'%f102, 0fb94ca1f9; '#9'// -0.000195153'
|
|
|
|
- #9'mad.f32 '#9'%f103, %f102, %f19, %f101;'
|
|
|
|
- #9'mad.f32 '#9'%f104, %f19, %f103, %f100;'
|
|
|
|
- #9'mul.f32 '#9'%f105, %f19, %f104;'
|
|
|
|
- #9'mad.f32 '#9'%f28, %f105, %f7, %f7;'
|
|
|
|
- '$Lt_0_63234:'
|
|
|
|
- #9'.loc'#9'18'#9'1957'#9'0'
|
|
|
|
- #9'neg.f32 '#9'%f106, %f28;'
|
|
|
|
- #9'and.b32 '#9'%r214, %r77, 2;'
|
|
|
|
- #9'mov.s32 '#9'%r215, 0;'
|
|
|
|
- #9'setp.ne.s32 '#9'%p36, %r214, %r215;'
|
|
|
|
- #9'selp.f32 '#9'%f28, %f106, %f28, %p36;'
|
|
|
|
- #9'mov.f32 '#9'%f82, %f28;'
|
|
|
|
- '$Lt_0_1282:'
|
|
|
|
- #9'.loc'#9'18'#9'1869'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r216, 0;'
|
|
|
|
- #9'setp.eq.s32 '#9'%p37, %r84, %r216;'
|
|
|
|
- #9'@%p37 bra '#9'$Lt_0_63746;'
|
|
|
|
- #9'mov.f32 '#9'%f107, 0f00000000; '#9'// 0'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1869'#9'0'
|
|
|
|
- #9'mul.rn.f32 '#9'%f38, %f1, %f107;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_258;'
|
|
|
|
- '$Lt_0_63746:'
|
|
|
|
- #9'.loc'#9'18'#9'1622'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f108, 0f473ba700; '#9'// 48039'
|
|
|
|
- #9'setp.gt.f32 '#9'%p38, %f2, %f108;'
|
|
|
|
- #9'@!%p38 bra '#9'$Lt_0_64258;'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1625'#9'0'
|
|
|
|
- #9'mov.b32 '#9'%r3, %f1;'
|
|
|
|
- #9'and.b32 '#9'%r4, %r3, -2147483648;'
|
|
|
|
- #9'mov.s32 '#9'%r87, %r4;'
|
|
|
|
- #9'.loc'#9'18'#9'24'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r6, %r3, 8;'
|
|
|
|
- #9'mov.s32 '#9'%r88, %r1;'
|
|
|
|
- #9'add.u32 '#9'%r8, %r1, 24;'
|
|
|
|
- #9'mov.u32 '#9'%r89, __cuda_result_44;'
|
|
|
|
- #9'or.b32 '#9'%r10, %r6, -2147483648;'
|
|
|
|
- #9'mov.u32 '#9'%r90, 0;'
|
|
|
|
- '$Lt_0_65282:'
|
|
|
|
- ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
|
|
|
|
- #9'.loc'#9'18'#9'1642'#9'0'
|
|
|
|
- #9'ld.const.u32 '#9'%r217, [%r88+0];'
|
|
|
|
- #9'mul.lo.u32 '#9'%r218, %r217, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r219, %r218, %r90;'
|
|
|
|
- #9'.loc'#9'18'#9'1643'#9'0'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r220, %r218, %r219;'
|
|
|
|
- #9'neg.s32 '#9'%r221, %r220;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r222, %r217, %r10;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r221, %r222;'
|
|
|
|
- #9'.loc'#9'18'#9'1644'#9'0'
|
|
|
|
- #9'st.local.u32 '#9'[%r89+0], %r219;'
|
|
|
|
- #9'add.u32 '#9'%r89, %r89, 4;'
|
|
|
|
- #9'add.u32 '#9'%r88, %r88, 4;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p39, %r88, %r8;'
|
|
|
|
- #9'@%p39 bra '#9'$Lt_0_65282;'
|
|
|
|
- #9'.loc'#9'18'#9'1646'#9'0'
|
|
|
|
- #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;'
|
|
|
|
- #9'.loc'#9'18'#9'1651'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r223, %r3, 1;'
|
|
|
|
- #9'shr.u32 '#9'%r19, %r223, 24;'
|
|
|
|
- #9'sub.u32 '#9'%r20, %r19, 128;'
|
|
|
|
- #9'shr.u32 '#9'%r21, %r20, 5;'
|
|
|
|
- #9'mov.s32 '#9'%r224, 4;'
|
|
|
|
- #9'sub.s32 '#9'%r23, %r224, %r21;'
|
|
|
|
- #9'mul.lo.u32 '#9'%r24, %r23, 4;'
|
|
|
|
- #9'add.u32 '#9'%r99, %r24, %r86;'
|
|
|
|
- #9'ld.local.u32 '#9'%r90, [%r99+8];'
|
|
|
|
- #9'.loc'#9'18'#9'1652'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r100, [%r99+4];'
|
|
|
|
- #9'and.b32 '#9'%r27, %r20, 31;'
|
|
|
|
- #9'mov.u32 '#9'%r225, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p40, %r27, %r225;'
|
|
|
|
- #9'@%p40 bra '#9'$Lt_0_65794;'
|
|
|
|
- #9'.loc'#9'18'#9'1655'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r226, 32;'
|
|
|
|
- #9'sub.s32 '#9'%r30, %r226, %r27;'
|
|
|
|
- #9'shr.u32 '#9'%r227, %r100, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r228, %r90, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r227, %r228;'
|
|
|
|
- #9'.loc'#9'18'#9'1656'#9'0'
|
|
|
|
- #9'ld.local.u32 '#9'%r229, [%r99+0];'
|
|
|
|
- #9'shr.u32 '#9'%r230, %r229, %r30;'
|
|
|
|
- #9'shl.b32 '#9'%r231, %r100, %r27;'
|
|
|
|
- #9'add.u32 '#9'%r100, %r230, %r231;'
|
|
|
|
- '$Lt_0_65794:'
|
|
|
|
- #9'.loc'#9'18'#9'1658'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r108, %r90, 30;'
|
|
|
|
- #9'.loc'#9'18'#9'1660'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r232, %r100, 30;'
|
|
|
|
- #9'shl.b32 '#9'%r233, %r90, 2;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r232, %r233;'
|
|
|
|
- #9'.loc'#9'18'#9'1661'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r100, %r100, 2;'
|
|
|
|
- #9'mov.u32 '#9'%r234, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p41, %r100, %r234;'
|
|
|
|
- #9'@%p41 bra '#9'$Lt_0_66562;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r235, %r90, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r236, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r237, %r235, %r236;'
|
|
|
|
- #9'neg.s32 '#9'%r238, %r237;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_66306;'
|
|
|
|
- '$Lt_0_66562:'
|
|
|
|
- #9'mov.u32 '#9'%r239, -2147483648;'
|
|
|
|
- #9'set.gt.u32.u32 '#9'%r240, %r90, %r239;'
|
|
|
|
- #9'neg.s32 '#9'%r238, %r240;'
|
|
|
|
- '$Lt_0_66306:'
|
|
|
|
- #9'.loc'#9'18'#9'1663'#9'0'
|
|
|
|
- #9'add.u32 '#9'%r108, %r108, %r238;'
|
|
|
|
- #9'.loc'#9'18'#9'1662'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r241, %r108;'
|
|
|
|
- #9'mov.u32 '#9'%r242, 0;'
|
|
|
|
- #9'setp.ne.u32 '#9'%p42, %r4, %r242;'
|
|
|
|
- #9'selp.s32 '#9'%r108, %r241, %r108, %p42;'
|
|
|
|
- #9'mov.u32 '#9'%r243, 0;'
|
|
|
|
- #9'setp.eq.u32 '#9'%p43, %r238, %r243;'
|
|
|
|
- #9'@%p43 bra '#9'$Lt_0_66818;'
|
|
|
|
- #9'.loc'#9'18'#9'1668'#9'0'
|
|
|
|
- #9'neg.s32 '#9'%r100, %r100;'
|
|
|
|
- #9'.loc'#9'18'#9'1670'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r244, 0;'
|
|
|
|
- #9'set.eq.u32.u32 '#9'%r245, %r100, %r244;'
|
|
|
|
- #9'neg.s32 '#9'%r246, %r245;'
|
|
|
|
- #9'not.b32 '#9'%r247, %r90;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r246, %r247;'
|
|
|
|
- #9'.loc'#9'18'#9'1671'#9'0'
|
|
|
|
- #9'xor.b32 '#9'%r87, %r4, -2147483648;'
|
|
|
|
- '$Lt_0_66818:'
|
|
|
|
- #9'.loc'#9'18'#9'1673'#9'0'
|
|
|
|
- #9'mov.s32 '#9'%r125, %r108;'
|
|
|
|
- #9'mov.u32 '#9'%r248, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p44, %r90, %r248;'
|
|
|
|
- #9'@%p44 bra '#9'$Lt_0_70146;'
|
|
|
|
- #9'mov.u32 '#9'%r127, 0;'
|
|
|
|
- '$Lt_0_67842:'
|
|
|
|
-
|
|
|
|
- ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
|
|
|
|
- 'tions: unknown'
|
|
|
|
- #9'.loc'#9'18'#9'1677'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r249, %r100, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r250, %r90, 1;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r249, %r250;'
|
|
|
|
- #9'.loc'#9'18'#9'1678'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r100, %r100, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1679'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r127, %r127, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r251, 0;'
|
|
|
|
- #9'setp.gt.s32 '#9'%p45, %r90, %r251;'
|
|
|
|
- #9'@%p45 bra '#9'$Lt_0_67842;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_67330;'
|
|
|
|
- '$Lt_0_70146:'
|
|
|
|
- #9'mov.u32 '#9'%r127, 0;'
|
|
|
|
- '$Lt_0_67330:'
|
|
|
|
- #9'.loc'#9'18'#9'1681'#9'0'
|
|
|
|
- #9'mul.lo.u32 '#9'%r100, %r90, -921707870;'
|
|
|
|
- #9'.loc'#9'18'#9'1682'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r252, -921707870;'
|
|
|
|
- #9'mul.hi.u32 '#9'%r90, %r90, %r252;'
|
|
|
|
- #9'mov.u32 '#9'%r253, 0;'
|
|
|
|
- #9'setp.le.s32 '#9'%p46, %r90, %r253;'
|
|
|
|
- #9'@%p46 bra '#9'$Lt_0_68354;'
|
|
|
|
- #9'.loc'#9'18'#9'1684'#9'0'
|
|
|
|
- #9'shr.u32 '#9'%r254, %r100, 31;'
|
|
|
|
- #9'shl.b32 '#9'%r255, %r90, 1;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r254, %r255;'
|
|
|
|
- #9'.loc'#9'18'#9'1685'#9'0'
|
|
|
|
- #9'shl.b32 '#9'%r100, %r100, 1;'
|
|
|
|
- #9'.loc'#9'18'#9'1686'#9'0'
|
|
|
|
- #9'sub.u32 '#9'%r127, %r127, 1;'
|
|
|
|
- '$Lt_0_68354:'
|
|
|
|
- #9'.loc'#9'18'#9'1688'#9'0'
|
|
|
|
- #9'mov.u32 '#9'%r256, 0;'
|
|
|
|
- #9'set.ne.u32.u32 '#9'%r257, %r100, %r256;'
|
|
|
|
- #9'neg.s32 '#9'%r258, %r257;'
|
|
|
|
- #9'add.u32 '#9'%r90, %r258, %r90;'
|
|
|
|
- #9'shl.b32 '#9'%r259, %r90, 24;'
|
|
|
|
- #9'mov.s32 '#9'%r260, 0;'
|
|
|
|
- #9'set.lt.u32.s32 '#9'%r261, %r259, %r260;'
|
|
|
|
- #9'neg.s32 '#9'%r262, %r261;'
|
|
|
|
- #9'shr.u32 '#9'%r263, %r90, 8;'
|
|
|
|
- #9'add.u32 '#9'%r264, %r127, 126;'
|
|
|
|
- #9'shl.b32 '#9'%r265, %r264, 23;'
|
|
|
|
- #9'add.u32 '#9'%r266, %r263, %r265;'
|
|
|
|
- #9'add.u32 '#9'%r267, %r262, %r266;'
|
|
|
|
- #9'or.b32 '#9'%r268, %r87, %r267;'
|
|
|
|
- #9'mov.b32 '#9'%f40, %r268;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_514;'
|
|
|
|
- '$Lt_0_64258:'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f109, 0f3f22f983; '#9'// 0.63662'
|
|
|
|
- #9'.loc'#9'18'#9'1946'#9'0'
|
|
|
|
- #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
|
|
|
|
- #9'.loc'#9'18'#9'1703'#9'0'
|
|
|
|
- #9'mul.f32 '#9'%f9, %f1, %f109;'
|
|
|
|
- #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
|
|
|
|
- #9'mov.s32 '#9'%r125, %r76;'
|
|
|
|
- #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
|
|
|
|
- #9'neg.f32 '#9'%f11, %f10;'
|
|
|
|
- #9'mov.f32 '#9'%f110, 0f3fc90000; '#9'// 1.57031'
|
|
|
|
- #9'mad.f32 '#9'%f111, %f110, %f11, %f1;'
|
|
|
|
- #9'mov.f32 '#9'%f112, 0f39fd8000; '#9'// 0.000483513'
|
|
|
|
- #9'mad.f32 '#9'%f113, %f112, %f11, %f111;'
|
|
|
|
- #9'mov.f32 '#9'%f114, 0f34a88000; '#9'// 3.13856e-007'
|
|
|
|
- #9'mad.f32 '#9'%f115, %f114, %f11, %f113;'
|
|
|
|
- #9'mov.f32 '#9'%f116, 0f2e85a309; '#9'// 6.0771e-011'
|
|
|
|
- #9'mad.f32 '#9'%f40, %f116, %f11, %f115;'
|
|
|
|
- '$Lt_0_514:'
|
|
|
|
- #9'.loc'#9'18'#9'1872'#9'0'
|
|
|
|
- #9'mul.f32 '#9'%f49, %f40, %f40;'
|
|
|
|
- #9'and.b32 '#9'%r269, %r125, 1;'
|
|
|
|
- #9'mov.u32 '#9'%r270, 0;'
|
|
|
|
- #9'setp.eq.s32 '#9'%p47, %r269, %r270;'
|
|
|
|
- #9'@%p47 bra '#9'$Lt_0_69122;'
|
|
|
|
- #9'.loc'#9'18'#9'1875'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f117, 0f3f800000; '#9'// 1'
|
|
|
|
- #9'mov.f32 '#9'%f118, 0fbf000000; '#9'// -0.5'
|
|
|
|
- #9'mov.f32 '#9'%f119, 0f3d2aaaa5; '#9'// 0.0416666'
|
|
|
|
- #9'mov.f32 '#9'%f120, 0fbab6061a; '#9'// -0.00138873'
|
|
|
|
- #9'mov.f32 '#9'%f121, 0f37ccf5ce; '#9'// 2.44332e-005'
|
|
|
|
- #9'mad.f32 '#9'%f122, %f121, %f49, %f120;'
|
|
|
|
- #9'mad.f32 '#9'%f123, %f49, %f122, %f119;'
|
|
|
|
- #9'mad.f32 '#9'%f124, %f49, %f123, %f118;'
|
|
|
|
- #9'mad.f32 '#9'%f58, %f49, %f124, %f117;'
|
|
|
|
- #9'bra.uni '#9'$Lt_0_68866;'
|
|
|
|
- '$Lt_0_69122:'
|
|
|
|
- #9'.loc'#9'18'#9'1877'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f125, 0fbe2aaaa3; '#9'// -0.166667'
|
|
|
|
- #9'mov.f32 '#9'%f126, 0f3c08839e; '#9'// 0.00833216'
|
|
|
|
- #9'mov.f32 '#9'%f127, 0fb94ca1f9; '#9'// -0.000195153'
|
|
|
|
- #9'mad.f32 '#9'%f128, %f127, %f49, %f126;'
|
|
|
|
- #9'mad.f32 '#9'%f129, %f49, %f128, %f125;'
|
|
|
|
- #9'mul.f32 '#9'%f130, %f49, %f129;'
|
|
|
|
- #9'mad.f32 '#9'%f58, %f130, %f40, %f40;'
|
|
|
|
- '$Lt_0_68866:'
|
|
|
|
- #9'.loc'#9'18'#9'1879'#9'0'
|
|
|
|
- #9'neg.f32 '#9'%f131, %f58;'
|
|
|
|
- #9'and.b32 '#9'%r271, %r125, 2;'
|
|
|
|
- #9'mov.s32 '#9'%r272, 0;'
|
|
|
|
- #9'setp.ne.s32 '#9'%p48, %r271, %r272;'
|
|
|
|
- #9'selp.f32 '#9'%f58, %f131, %f58, %p48;'
|
|
|
|
- #9'mov.f32 '#9'%f38, %f58;'
|
|
|
|
- '$Lt_0_258:'
|
|
|
|
- #9'.loc'#9'15'#9'23'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f132, %f80;'
|
|
|
|
- #9'mul.f32 '#9'%f133, %f82, %f73;'
|
|
|
|
- #9'mad.f32 '#9'%f134, %f75, %f38, %f133;'
|
|
|
|
- #9'mov.f32 '#9'%f135, 0f3f000000; '#9'// 0.5'
|
|
|
|
- #9'add.f32 '#9'%f136, %f134, %f135;'
|
|
|
|
- #9'mov.f32 '#9'%f137, 0f00000000; '#9'// 0'
|
|
|
|
- #9'mov.f32 '#9'%f138, 0f00000000; '#9'// 0'
|
|
|
|
-
|
|
|
|
- #9'tex.2d.v4.f32.f32 {%f139,%f140,%f141,%f142},[tex,{%f132,%f136,%' +
|
|
|
|
- 'f137,%f138}];'
|
|
|
|
- #9'.loc'#9'15'#9'26'#9'0'
|
|
|
|
- #9'mov.f32 '#9'%f143, %f139;'
|
|
|
|
- #9'ld.param.u32 '#9'%r273, [__cudaparm_transformKernel_g_odata];'
|
|
|
|
- #9'.loc'#9'15'#9'22'#9'0'
|
|
|
|
- #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];'
|
|
|
|
- #9'.loc'#9'15'#9'26'#9'0'
|
|
|
|
- #9'mul.lo.u32 '#9'%r274, %r155, %r157;'
|
|
|
|
- #9'add.u32 '#9'%r275, %r159, %r274;'
|
|
|
|
- #9'mul.lo.u32 '#9'%r276, %r275, 4;'
|
|
|
|
- #9'add.u32 '#9'%r277, %r273, %r276;'
|
|
|
|
- #9'st.global.f32 '#9'[%r277+0], %f143;'
|
|
|
|
- #9'.loc'#9'15'#9'27'#9'0'
|
|
|
|
#9'exit;'
|
|
#9'exit;'
|
|
'$LDWend_transformKernel:'
|
|
'$LDWend_transformKernel:'
|
|
#9'} // transformKernel'
|
|
#9'} // transformKernel'
|