fSimpleTexD.dfm 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. object Form1: TForm1
  2. Left = 0
  3. Top = 0
  4. BorderStyle = bsDialog
  5. Caption = 'CUDA: simple using of texture'
  6. ClientHeight = 446
  7. ClientWidth = 782
  8. Color = clBtnFace
  9. Font.Charset = DEFAULT_CHARSET
  10. Font.Color = clWindowText
  11. Font.Height = -15
  12. Font.Name = 'Segoe UI'
  13. Font.Style = []
  14. Position = poScreenCenter
  15. OnCreate = FormCreate
  16. OnDestroy = FormDestroy
  17. PixelsPerInch = 120
  18. TextHeight = 20
  19. object Button1: TButton
  20. Left = 608
  21. Top = 328
  22. Width = 94
  23. Height = 32
  24. Margins.Left = 4
  25. Margins.Top = 4
  26. Margins.Right = 4
  27. Margins.Bottom = 4
  28. Caption = 'Button1'
  29. TabOrder = 0
  30. OnClick = Button1Click
  31. end
  32. object Memo1: TMemo
  33. Left = 0
  34. Top = 0
  35. Width = 782
  36. Height = 273
  37. Margins.Left = 4
  38. Margins.Top = 4
  39. Margins.Right = 4
  40. Margins.Bottom = 4
  41. Align = alTop
  42. ScrollBars = ssVertical
  43. TabOrder = 1
  44. end
  45. object GLCUDA1: TGLCUDA
  46. ComputingDevice = GLCUDADevice1
  47. Left = 104
  48. Top = 56
  49. object MainModule: TCUDAModule
  50. Code.Strings = (
  51. #9'.version 1.4'
  52. #9'.target sm_10, map_f64_to_f32'
  53. #9'// compiled with C:\CUDA\bin/../open64/lib//be.exe'
  54. #9'// nvopencc 3.0 built on 2009-10-29'
  55. ''
  56. #9'//-----------------------------------------------------------'
  57. #9'// Compiling C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a' +
  58. '14_00000001-9_temp.cpp3.i (C:/Users/YARUND~1/AppData/Local/Temp/' +
  59. 'ccBI#.a03752)'
  60. #9'//-----------------------------------------------------------'
  61. ''
  62. #9'//-----------------------------------------------------------'
  63. #9'// Options:'
  64. #9'//-----------------------------------------------------------'
  65. #9'// Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32'
  66. #9'// -O3'#9'(Optimization level)'
  67. #9'// -g0'#9'(Debug level)'
  68. #9'// -m2'#9'(Report advisories)'
  69. #9'//-----------------------------------------------------------'
  70. ''
  71. #9'.file'#9'1'#9'"C:/Users/YARUND~1/AppData/Local/Temp/tmpxft_00000a14_0' +
  72. '0000001-8_temp.cudafe2.gpu"'
  73. #9'.file'#9'2'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLUD' +
  74. 'E\crtdefs.h"'
  75. #9'.file'#9'3'#9'"C:\CUDA\include\crt/device_runtime.h"'
  76. #9'.file'#9'4'#9'"C:\CUDA\include\host_defines.h"'
  77. #9'.file'#9'5'#9'"C:\CUDA\include\builtin_types.h"'
  78. #9'.file'#9'6'#9'"c:\cuda\include\device_types.h"'
  79. #9'.file'#9'7'#9'"c:\cuda\include\driver_types.h"'
  80. #9'.file'#9'8'#9'"c:\cuda\include\surface_types.h"'
  81. #9'.file'#9'9'#9'"c:\cuda\include\texture_types.h"'
  82. #9'.file'#9'10'#9'"c:\cuda\include\vector_types.h"'
  83. #9'.file'#9'11'#9'"c:\cuda\include\host_defines.h"'
  84. #9'.file'#9'12'#9'"C:\CUDA\include\device_launch_parameters.h"'
  85. #9'.file'#9'13'#9'"c:\cuda\include\crt\storage_class.h"'
  86. #9'.file'#9'14'#9'"C:\Program Files\Microsoft Visual Studio 9.0\VC\INCLU' +
  87. 'DE\time.h"'
  88. #9'.file'#9'15'#9'"C:/Users/YARUND~1/AppData/Local/Temp/temp.cu"'
  89. #9'.file'#9'16'#9'"C:\CUDA\include\common_functions.h"'
  90. #9'.file'#9'17'#9'"c:\cuda\include\crt/func_macro.h"'
  91. #9'.file'#9'18'#9'"c:\cuda\include\math_functions.h"'
  92. #9'.file'#9'19'#9'"c:\cuda\include\device_functions.h"'
  93. #9'.file'#9'20'#9'"c:\cuda\include\math_constants.h"'
  94. #9'.file'#9'21'#9'"c:\cuda\include\sm_11_atomic_functions.h"'
  95. #9'.file'#9'22'#9'"c:\cuda\include\sm_12_atomic_functions.h"'
  96. #9'.file'#9'23'#9'"c:\cuda\include\sm_13_double_functions.h"'
  97. #9'.file'#9'24'#9'"c:\cuda\include\common_types.h"'
  98. #9'.file'#9'25'#9'"c:\cuda\include\sm_20_atomic_functions.h"'
  99. #9'.file'#9'26'#9'"c:\cuda\include\sm_20_intrinsics.h"'
  100. #9'.file'#9'27'#9'"c:\cuda\include\surface_functions.h"'
  101. #9'.file'#9'28'#9'"c:\cuda\include\texture_fetch_functions.h"'
  102. #9'.file'#9'29'#9'"c:\cuda\include\math_functions_dbl_ptx1.h"'
  103. ''
  104. #9'.tex .u32 tex;'
  105. #9'.const .align 4 .b8 __cudart_i2opi_f[24] = {65,144,67,60,153,14' +
  106. '9,98,219,192,221,52,245,209,87,39,252,41,21,68,78,110,131,249,16' +
  107. '2};'
  108. ''
  109. #9'.entry transformKernel ('
  110. #9#9'.param .u32 __cudaparm_transformKernel_g_odata,'
  111. #9#9'.param .s32 __cudaparm_transformKernel_width,'
  112. #9#9'.param .s32 __cudaparm_transformKernel_height,'
  113. #9#9'.param .f32 __cudaparm_transformKernel_theta)'
  114. #9'{'
  115. #9'.reg .u16 %rh<6>;'
  116. #9'.reg .u32 %r<279>;'
  117. #9'.reg .f32 %f<145>;'
  118. #9'.reg .pred %p<50>;'
  119. #9'.local .align 4 .b8 __cuda_result_16[28];'
  120. #9'.local .align 4 .b8 __cuda_result_44[28];'
  121. #9'.loc'#9'15'#9'10'#9'0'
  122. '$LBB1_transformKernel:'
  123. #9'.loc'#9'18'#9'1946'#9'0'
  124. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  125. #9'abs.f32 '#9'%f2, %f1;'
  126. #9'mov.f32 '#9'%f3, 0f7f800000; '#9'// 1.#INF'
  127. #9'setp.eq.f32 '#9'%p1, %f2, %f3;'
  128. #9'@!%p1 bra '#9'$Lt_0_46850;'
  129. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  130. #9'neg.f32 '#9'%f4, %f1;'
  131. #9'add.rn.f32 '#9'%f5, %f1, %f4;'
  132. #9'mov.u32 '#9'%r1, __cudart_i2opi_f;'
  133. #9'mov.u32 '#9'%r2, __cuda_result_16;'
  134. #9'bra.uni '#9'$Lt_0_3330;'
  135. '$Lt_0_46850:'
  136. #9'.loc'#9'18'#9'1622'#9'0'
  137. #9'mov.f32 '#9'%f6, 0f473ba700; '#9'// 48039'
  138. #9'setp.gt.f32 '#9'%p2, %f2, %f6;'
  139. #9'.loc'#9'18'#9'1625'#9'0'
  140. #9'mov.u32 '#9'%r1, __cudart_i2opi_f;'
  141. #9'.loc'#9'18'#9'1622'#9'0'
  142. #9'@!%p2 bra '#9'$Lt_0_47362;'
  143. #9'.loc'#9'18'#9'1946'#9'0'
  144. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  145. #9'.loc'#9'18'#9'1625'#9'0'
  146. #9'mov.b32 '#9'%r3, %f1;'
  147. #9'and.b32 '#9'%r4, %r3, -2147483648;'
  148. #9'mov.s32 '#9'%r5, %r4;'
  149. #9'.loc'#9'18'#9'24'#9'0'
  150. #9'shl.b32 '#9'%r6, %r3, 8;'
  151. #9'mov.s32 '#9'%r7, %r1;'
  152. #9'add.u32 '#9'%r8, %r1, 24;'
  153. #9'mov.u32 '#9'%r9, __cuda_result_16;'
  154. #9'or.b32 '#9'%r10, %r6, -2147483648;'
  155. #9'mov.u32 '#9'%r11, 0;'
  156. '$Lt_0_48386:'
  157. ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
  158. #9'.loc'#9'18'#9'1642'#9'0'
  159. #9'ld.const.u32 '#9'%r12, [%r7+0];'
  160. #9'mul.lo.u32 '#9'%r13, %r12, %r10;'
  161. #9'add.u32 '#9'%r14, %r13, %r11;'
  162. #9'.loc'#9'18'#9'1643'#9'0'
  163. #9'set.gt.u32.u32 '#9'%r15, %r13, %r14;'
  164. #9'neg.s32 '#9'%r16, %r15;'
  165. #9'mul.hi.u32 '#9'%r17, %r12, %r10;'
  166. #9'add.u32 '#9'%r11, %r16, %r17;'
  167. #9'.loc'#9'18'#9'1644'#9'0'
  168. #9'st.local.u32 '#9'[%r9+0], %r14;'
  169. #9'add.u32 '#9'%r9, %r9, 4;'
  170. #9'add.u32 '#9'%r7, %r7, 4;'
  171. #9'setp.ne.u32 '#9'%p3, %r7, %r8;'
  172. #9'@%p3 bra '#9'$Lt_0_48386;'
  173. #9'.loc'#9'18'#9'1646'#9'0'
  174. #9'mov.u32 '#9'%r2, __cuda_result_16;'
  175. #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;'
  176. #9'.loc'#9'18'#9'1651'#9'0'
  177. #9'shl.b32 '#9'%r18, %r3, 1;'
  178. #9'shr.u32 '#9'%r19, %r18, 24;'
  179. #9'sub.u32 '#9'%r20, %r19, 128;'
  180. #9'shr.u32 '#9'%r21, %r20, 5;'
  181. #9'mov.s32 '#9'%r22, 4;'
  182. #9'sub.s32 '#9'%r23, %r22, %r21;'
  183. #9'mul.lo.u32 '#9'%r24, %r23, 4;'
  184. #9'add.u32 '#9'%r25, %r24, %r2;'
  185. #9'ld.local.u32 '#9'%r11, [%r25+8];'
  186. #9'.loc'#9'18'#9'1652'#9'0'
  187. #9'ld.local.u32 '#9'%r26, [%r25+4];'
  188. #9'and.b32 '#9'%r27, %r20, 31;'
  189. #9'mov.u32 '#9'%r28, 0;'
  190. #9'setp.eq.u32 '#9'%p4, %r27, %r28;'
  191. #9'@%p4 bra '#9'$Lt_0_48898;'
  192. #9'.loc'#9'18'#9'1655'#9'0'
  193. #9'mov.s32 '#9'%r29, 32;'
  194. #9'sub.s32 '#9'%r30, %r29, %r27;'
  195. #9'shr.u32 '#9'%r31, %r26, %r30;'
  196. #9'shl.b32 '#9'%r32, %r11, %r27;'
  197. #9'add.u32 '#9'%r11, %r31, %r32;'
  198. #9'.loc'#9'18'#9'1656'#9'0'
  199. #9'ld.local.u32 '#9'%r33, [%r25+0];'
  200. #9'shr.u32 '#9'%r34, %r33, %r30;'
  201. #9'shl.b32 '#9'%r35, %r26, %r27;'
  202. #9'add.u32 '#9'%r26, %r34, %r35;'
  203. '$Lt_0_48898:'
  204. #9'.loc'#9'18'#9'1658'#9'0'
  205. #9'shr.u32 '#9'%r36, %r11, 30;'
  206. #9'.loc'#9'18'#9'1660'#9'0'
  207. #9'shr.u32 '#9'%r37, %r26, 30;'
  208. #9'shl.b32 '#9'%r38, %r11, 2;'
  209. #9'add.u32 '#9'%r11, %r37, %r38;'
  210. #9'.loc'#9'18'#9'1661'#9'0'
  211. #9'shl.b32 '#9'%r26, %r26, 2;'
  212. #9'mov.u32 '#9'%r39, 0;'
  213. #9'setp.eq.u32 '#9'%p5, %r26, %r39;'
  214. #9'@%p5 bra '#9'$Lt_0_49666;'
  215. #9'.loc'#9'18'#9'1662'#9'0'
  216. #9'add.u32 '#9'%r40, %r11, 1;'
  217. #9'mov.u32 '#9'%r41, -2147483648;'
  218. #9'set.gt.u32.u32 '#9'%r42, %r40, %r41;'
  219. #9'neg.s32 '#9'%r43, %r42;'
  220. #9'bra.uni '#9'$Lt_0_49410;'
  221. '$Lt_0_49666:'
  222. #9'mov.u32 '#9'%r44, -2147483648;'
  223. #9'set.gt.u32.u32 '#9'%r45, %r11, %r44;'
  224. #9'neg.s32 '#9'%r43, %r45;'
  225. '$Lt_0_49410:'
  226. #9'.loc'#9'18'#9'1663'#9'0'
  227. #9'add.u32 '#9'%r36, %r36, %r43;'
  228. #9'.loc'#9'18'#9'1662'#9'0'
  229. #9'neg.s32 '#9'%r46, %r36;'
  230. #9'mov.u32 '#9'%r47, 0;'
  231. #9'setp.ne.u32 '#9'%p6, %r4, %r47;'
  232. #9'selp.s32 '#9'%r36, %r46, %r36, %p6;'
  233. #9'mov.u32 '#9'%r48, 0;'
  234. #9'setp.eq.u32 '#9'%p7, %r43, %r48;'
  235. #9'@%p7 bra '#9'$Lt_0_49922;'
  236. #9'.loc'#9'18'#9'1668'#9'0'
  237. #9'neg.s32 '#9'%r26, %r26;'
  238. #9'.loc'#9'18'#9'1670'#9'0'
  239. #9'mov.u32 '#9'%r49, 0;'
  240. #9'set.eq.u32.u32 '#9'%r50, %r26, %r49;'
  241. #9'neg.s32 '#9'%r51, %r50;'
  242. #9'not.b32 '#9'%r52, %r11;'
  243. #9'add.u32 '#9'%r11, %r51, %r52;'
  244. #9'.loc'#9'18'#9'1671'#9'0'
  245. #9'xor.b32 '#9'%r5, %r4, -2147483648;'
  246. '$Lt_0_49922:'
  247. #9'.loc'#9'18'#9'1673'#9'0'
  248. #9'mov.s32 '#9'%r53, %r36;'
  249. #9'mov.u32 '#9'%r54, 0;'
  250. #9'setp.le.s32 '#9'%p8, %r11, %r54;'
  251. #9'mov.u32 '#9'%r55, 0;'
  252. #9'@%p8 bra '#9'$Lt_0_69378;'
  253. '$Lt_0_50946:'
  254. ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
  255. 'tions: unknown'
  256. #9'.loc'#9'18'#9'1677'#9'0'
  257. #9'shr.u32 '#9'%r56, %r26, 31;'
  258. #9'shl.b32 '#9'%r57, %r11, 1;'
  259. #9'add.u32 '#9'%r11, %r56, %r57;'
  260. #9'.loc'#9'18'#9'1678'#9'0'
  261. #9'shl.b32 '#9'%r26, %r26, 1;'
  262. #9'.loc'#9'18'#9'1679'#9'0'
  263. #9'sub.u32 '#9'%r55, %r55, 1;'
  264. #9'mov.u32 '#9'%r58, 0;'
  265. #9'setp.gt.s32 '#9'%p9, %r11, %r58;'
  266. #9'@%p9 bra '#9'$Lt_0_50946;'
  267. #9'bra.uni '#9'$Lt_0_50434;'
  268. '$Lt_0_69378:'
  269. '$Lt_0_50434:'
  270. #9'.loc'#9'18'#9'1681'#9'0'
  271. #9'mul.lo.u32 '#9'%r26, %r11, -921707870;'
  272. #9'.loc'#9'18'#9'1682'#9'0'
  273. #9'mov.u32 '#9'%r59, -921707870;'
  274. #9'mul.hi.u32 '#9'%r11, %r11, %r59;'
  275. #9'mov.u32 '#9'%r60, 0;'
  276. #9'setp.le.s32 '#9'%p10, %r11, %r60;'
  277. #9'@%p10 bra '#9'$Lt_0_51458;'
  278. #9'.loc'#9'18'#9'1684'#9'0'
  279. #9'shr.u32 '#9'%r61, %r26, 31;'
  280. #9'shl.b32 '#9'%r62, %r11, 1;'
  281. #9'add.u32 '#9'%r11, %r61, %r62;'
  282. #9'.loc'#9'18'#9'1685'#9'0'
  283. #9'shl.b32 '#9'%r26, %r26, 1;'
  284. #9'.loc'#9'18'#9'1686'#9'0'
  285. #9'sub.u32 '#9'%r55, %r55, 1;'
  286. '$Lt_0_51458:'
  287. #9'.loc'#9'18'#9'1688'#9'0'
  288. #9'mov.u32 '#9'%r63, 0;'
  289. #9'set.ne.u32.u32 '#9'%r64, %r26, %r63;'
  290. #9'neg.s32 '#9'%r65, %r64;'
  291. #9'add.u32 '#9'%r11, %r65, %r11;'
  292. #9'shl.b32 '#9'%r66, %r11, 24;'
  293. #9'mov.s32 '#9'%r67, 0;'
  294. #9'set.lt.u32.s32 '#9'%r68, %r66, %r67;'
  295. #9'neg.s32 '#9'%r69, %r68;'
  296. #9'shr.u32 '#9'%r70, %r11, 8;'
  297. #9'add.u32 '#9'%r71, %r55, 126;'
  298. #9'shl.b32 '#9'%r72, %r71, 23;'
  299. #9'add.u32 '#9'%r73, %r70, %r72;'
  300. #9'add.u32 '#9'%r74, %r69, %r73;'
  301. #9'or.b32 '#9'%r75, %r5, %r74;'
  302. #9'mov.b32 '#9'%f7, %r75;'
  303. #9'bra.uni '#9'$Lt_0_3586;'
  304. '$Lt_0_47362:'
  305. #9'.loc'#9'18'#9'1703'#9'0'
  306. #9'mov.f32 '#9'%f8, 0f3f22f983; '#9'// 0.63662'
  307. #9'.loc'#9'18'#9'1946'#9'0'
  308. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  309. #9'.loc'#9'18'#9'1703'#9'0'
  310. #9'mul.f32 '#9'%f9, %f1, %f8;'
  311. #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
  312. #9'mov.s32 '#9'%r53, %r76;'
  313. #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
  314. #9'neg.f32 '#9'%f11, %f10;'
  315. #9'mov.f32 '#9'%f12, 0f3fc90000; '#9'// 1.57031'
  316. #9'mad.f32 '#9'%f13, %f12, %f11, %f1;'
  317. #9'mov.f32 '#9'%f14, 0f39fd8000; '#9'// 0.000483513'
  318. #9'mad.f32 '#9'%f15, %f14, %f11, %f13;'
  319. #9'mov.f32 '#9'%f16, 0f34a88000; '#9'// 3.13856e-007'
  320. #9'mad.f32 '#9'%f17, %f16, %f11, %f15;'
  321. #9'mov.f32 '#9'%f18, 0f2e85a309; '#9'// 6.0771e-011'
  322. #9'mad.f32 '#9'%f7, %f18, %f11, %f17;'
  323. #9'mov.u32 '#9'%r2, __cuda_result_16;'
  324. '$Lt_0_3586:'
  325. #9'.loc'#9'18'#9'1949'#9'0'
  326. #9'add.s32 '#9'%r77, %r53, 1;'
  327. #9'mul.f32 '#9'%f19, %f7, %f7;'
  328. #9'and.b32 '#9'%r78, %r77, 1;'
  329. #9'mov.u32 '#9'%r79, 0;'
  330. #9'setp.eq.s32 '#9'%p11, %r78, %r79;'
  331. #9'@%p11 bra '#9'$Lt_0_52226;'
  332. #9'.loc'#9'18'#9'1953'#9'0'
  333. #9'mov.f32 '#9'%f20, 0f3f800000; '#9'// 1'
  334. #9'mov.f32 '#9'%f21, 0fbf000000; '#9'// -0.5'
  335. #9'mov.f32 '#9'%f22, 0f3d2aaaa5; '#9'// 0.0416666'
  336. #9'mov.f32 '#9'%f23, 0fbab6061a; '#9'// -0.00138873'
  337. #9'mov.f32 '#9'%f24, 0f37ccf5ce; '#9'// 2.44332e-005'
  338. #9'mad.f32 '#9'%f25, %f24, %f19, %f23;'
  339. #9'mad.f32 '#9'%f26, %f19, %f25, %f22;'
  340. #9'mad.f32 '#9'%f27, %f19, %f26, %f21;'
  341. #9'mad.f32 '#9'%f28, %f19, %f27, %f20;'
  342. #9'bra.uni '#9'$Lt_0_51970;'
  343. '$Lt_0_52226:'
  344. #9'.loc'#9'18'#9'1955'#9'0'
  345. #9'mov.f32 '#9'%f29, 0fbe2aaaa3; '#9'// -0.166667'
  346. #9'mov.f32 '#9'%f30, 0f3c08839e; '#9'// 0.00833216'
  347. #9'mov.f32 '#9'%f31, 0fb94ca1f9; '#9'// -0.000195153'
  348. #9'mad.f32 '#9'%f32, %f31, %f19, %f30;'
  349. #9'mad.f32 '#9'%f33, %f19, %f32, %f29;'
  350. #9'mul.f32 '#9'%f34, %f19, %f33;'
  351. #9'mad.f32 '#9'%f28, %f34, %f7, %f7;'
  352. '$Lt_0_51970:'
  353. #9'.loc'#9'18'#9'1957'#9'0'
  354. #9'neg.f32 '#9'%f35, %f28;'
  355. #9'and.b32 '#9'%r80, %r77, 2;'
  356. #9'mov.s32 '#9'%r81, 0;'
  357. #9'setp.ne.s32 '#9'%p12, %r80, %r81;'
  358. #9'selp.f32 '#9'%f28, %f35, %f28, %p12;'
  359. #9'mov.f32 '#9'%f5, %f28;'
  360. '$Lt_0_3330:'
  361. #9'.loc'#9'18'#9'1869'#9'0'
  362. #9'mov.f32 '#9'%f36, 0f00000000; '#9'// 0'
  363. #9'.loc'#9'18'#9'1946'#9'0'
  364. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  365. #9'.loc'#9'18'#9'1869'#9'0'
  366. #9'setp.eq.f32 '#9'%p13, %f1, %f36;'
  367. #9'selp.s32 '#9'%r82, 1, 0, %p13;'
  368. #9'selp.s32 '#9'%r83, 1, 0, %p1;'
  369. #9'or.b32 '#9'%r84, %r82, %r83;'
  370. #9'mov.u32 '#9'%r85, 0;'
  371. #9'setp.eq.s32 '#9'%p14, %r84, %r85;'
  372. #9'@%p14 bra '#9'$Lt_0_52482;'
  373. #9'mov.f32 '#9'%f37, 0f00000000; '#9'// 0'
  374. #9'.loc'#9'18'#9'1946'#9'0'
  375. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  376. #9'.loc'#9'18'#9'1869'#9'0'
  377. #9'mul.rn.f32 '#9'%f38, %f1, %f37;'
  378. #9'mov.u32 '#9'%r86, __cuda_result_44;'
  379. #9'bra.uni '#9'$Lt_0_2306;'
  380. '$Lt_0_52482:'
  381. #9'.loc'#9'18'#9'1622'#9'0'
  382. #9'mov.f32 '#9'%f39, 0f473ba700; '#9'// 48039'
  383. #9'setp.gt.f32 '#9'%p15, %f2, %f39;'
  384. #9'@!%p15 bra '#9'$Lt_0_52994;'
  385. #9'.loc'#9'18'#9'1946'#9'0'
  386. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  387. #9'.loc'#9'18'#9'1625'#9'0'
  388. #9'mov.b32 '#9'%r3, %f1;'
  389. #9'and.b32 '#9'%r4, %r3, -2147483648;'
  390. #9'mov.s32 '#9'%r87, %r4;'
  391. #9'.loc'#9'18'#9'24'#9'0'
  392. #9'shl.b32 '#9'%r6, %r3, 8;'
  393. #9'mov.s32 '#9'%r88, %r1;'
  394. #9'add.u32 '#9'%r8, %r1, 24;'
  395. #9'mov.u32 '#9'%r89, __cuda_result_44;'
  396. #9'or.b32 '#9'%r10, %r6, -2147483648;'
  397. #9'mov.u32 '#9'%r90, 0;'
  398. '$Lt_0_54018:'
  399. ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
  400. #9'.loc'#9'18'#9'1642'#9'0'
  401. #9'ld.const.u32 '#9'%r91, [%r88+0];'
  402. #9'mul.lo.u32 '#9'%r92, %r91, %r10;'
  403. #9'add.u32 '#9'%r93, %r92, %r90;'
  404. #9'.loc'#9'18'#9'1643'#9'0'
  405. #9'set.gt.u32.u32 '#9'%r94, %r92, %r93;'
  406. #9'neg.s32 '#9'%r95, %r94;'
  407. #9'mul.hi.u32 '#9'%r96, %r91, %r10;'
  408. #9'add.u32 '#9'%r90, %r95, %r96;'
  409. #9'.loc'#9'18'#9'1644'#9'0'
  410. #9'st.local.u32 '#9'[%r89+0], %r93;'
  411. #9'add.u32 '#9'%r89, %r89, 4;'
  412. #9'add.u32 '#9'%r88, %r88, 4;'
  413. #9'setp.ne.u32 '#9'%p16, %r88, %r8;'
  414. #9'@%p16 bra '#9'$Lt_0_54018;'
  415. #9'.loc'#9'18'#9'1646'#9'0'
  416. #9'mov.u32 '#9'%r86, __cuda_result_44;'
  417. #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;'
  418. #9'.loc'#9'18'#9'1651'#9'0'
  419. #9'shl.b32 '#9'%r97, %r3, 1;'
  420. #9'shr.u32 '#9'%r19, %r97, 24;'
  421. #9'sub.u32 '#9'%r20, %r19, 128;'
  422. #9'shr.u32 '#9'%r21, %r20, 5;'
  423. #9'mov.s32 '#9'%r98, 4;'
  424. #9'sub.s32 '#9'%r23, %r98, %r21;'
  425. #9'mul.lo.u32 '#9'%r24, %r23, 4;'
  426. #9'add.u32 '#9'%r99, %r24, %r86;'
  427. #9'ld.local.u32 '#9'%r90, [%r99+8];'
  428. #9'.loc'#9'18'#9'1652'#9'0'
  429. #9'ld.local.u32 '#9'%r100, [%r99+4];'
  430. #9'and.b32 '#9'%r27, %r20, 31;'
  431. #9'mov.u32 '#9'%r101, 0;'
  432. #9'setp.eq.u32 '#9'%p17, %r27, %r101;'
  433. #9'@%p17 bra '#9'$Lt_0_54530;'
  434. #9'.loc'#9'18'#9'1655'#9'0'
  435. #9'mov.s32 '#9'%r102, 32;'
  436. #9'sub.s32 '#9'%r30, %r102, %r27;'
  437. #9'shr.u32 '#9'%r103, %r100, %r30;'
  438. #9'shl.b32 '#9'%r104, %r90, %r27;'
  439. #9'add.u32 '#9'%r90, %r103, %r104;'
  440. #9'.loc'#9'18'#9'1656'#9'0'
  441. #9'ld.local.u32 '#9'%r105, [%r99+0];'
  442. #9'shr.u32 '#9'%r106, %r105, %r30;'
  443. #9'shl.b32 '#9'%r107, %r100, %r27;'
  444. #9'add.u32 '#9'%r100, %r106, %r107;'
  445. '$Lt_0_54530:'
  446. #9'.loc'#9'18'#9'1658'#9'0'
  447. #9'shr.u32 '#9'%r108, %r90, 30;'
  448. #9'.loc'#9'18'#9'1660'#9'0'
  449. #9'shr.u32 '#9'%r109, %r100, 30;'
  450. #9'shl.b32 '#9'%r110, %r90, 2;'
  451. #9'add.u32 '#9'%r90, %r109, %r110;'
  452. #9'.loc'#9'18'#9'1661'#9'0'
  453. #9'shl.b32 '#9'%r100, %r100, 2;'
  454. #9'mov.u32 '#9'%r111, 0;'
  455. #9'setp.eq.u32 '#9'%p18, %r100, %r111;'
  456. #9'@%p18 bra '#9'$Lt_0_55298;'
  457. #9'.loc'#9'18'#9'1662'#9'0'
  458. #9'add.u32 '#9'%r112, %r90, 1;'
  459. #9'mov.u32 '#9'%r113, -2147483648;'
  460. #9'set.gt.u32.u32 '#9'%r114, %r112, %r113;'
  461. #9'neg.s32 '#9'%r115, %r114;'
  462. #9'bra.uni '#9'$Lt_0_55042;'
  463. '$Lt_0_55298:'
  464. #9'mov.u32 '#9'%r116, -2147483648;'
  465. #9'set.gt.u32.u32 '#9'%r117, %r90, %r116;'
  466. #9'neg.s32 '#9'%r115, %r117;'
  467. '$Lt_0_55042:'
  468. #9'.loc'#9'18'#9'1663'#9'0'
  469. #9'add.u32 '#9'%r108, %r108, %r115;'
  470. #9'.loc'#9'18'#9'1662'#9'0'
  471. #9'neg.s32 '#9'%r118, %r108;'
  472. #9'mov.u32 '#9'%r119, 0;'
  473. #9'setp.ne.u32 '#9'%p19, %r4, %r119;'
  474. #9'selp.s32 '#9'%r108, %r118, %r108, %p19;'
  475. #9'mov.u32 '#9'%r120, 0;'
  476. #9'setp.eq.u32 '#9'%p20, %r115, %r120;'
  477. #9'@%p20 bra '#9'$Lt_0_55554;'
  478. #9'.loc'#9'18'#9'1668'#9'0'
  479. #9'neg.s32 '#9'%r100, %r100;'
  480. #9'.loc'#9'18'#9'1670'#9'0'
  481. #9'mov.u32 '#9'%r121, 0;'
  482. #9'set.eq.u32.u32 '#9'%r122, %r100, %r121;'
  483. #9'neg.s32 '#9'%r123, %r122;'
  484. #9'not.b32 '#9'%r124, %r90;'
  485. #9'add.u32 '#9'%r90, %r123, %r124;'
  486. #9'.loc'#9'18'#9'1671'#9'0'
  487. #9'xor.b32 '#9'%r87, %r4, -2147483648;'
  488. '$Lt_0_55554:'
  489. #9'.loc'#9'18'#9'1673'#9'0'
  490. #9'mov.s32 '#9'%r125, %r108;'
  491. #9'mov.u32 '#9'%r126, 0;'
  492. #9'setp.le.s32 '#9'%p21, %r90, %r126;'
  493. #9'mov.u32 '#9'%r127, 0;'
  494. #9'@%p21 bra '#9'$Lt_0_69634;'
  495. '$Lt_0_56578:'
  496. ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
  497. 'tions: unknown'
  498. #9'.loc'#9'18'#9'1677'#9'0'
  499. #9'shr.u32 '#9'%r128, %r100, 31;'
  500. #9'shl.b32 '#9'%r129, %r90, 1;'
  501. #9'add.u32 '#9'%r90, %r128, %r129;'
  502. #9'.loc'#9'18'#9'1678'#9'0'
  503. #9'shl.b32 '#9'%r100, %r100, 1;'
  504. #9'.loc'#9'18'#9'1679'#9'0'
  505. #9'sub.u32 '#9'%r127, %r127, 1;'
  506. #9'mov.u32 '#9'%r130, 0;'
  507. #9'setp.gt.s32 '#9'%p22, %r90, %r130;'
  508. #9'@%p22 bra '#9'$Lt_0_56578;'
  509. #9'bra.uni '#9'$Lt_0_56066;'
  510. '$Lt_0_69634:'
  511. '$Lt_0_56066:'
  512. #9'.loc'#9'18'#9'1681'#9'0'
  513. #9'mul.lo.u32 '#9'%r100, %r90, -921707870;'
  514. #9'.loc'#9'18'#9'1682'#9'0'
  515. #9'mov.u32 '#9'%r131, -921707870;'
  516. #9'mul.hi.u32 '#9'%r90, %r90, %r131;'
  517. #9'mov.u32 '#9'%r132, 0;'
  518. #9'setp.le.s32 '#9'%p23, %r90, %r132;'
  519. #9'@%p23 bra '#9'$Lt_0_57090;'
  520. #9'.loc'#9'18'#9'1684'#9'0'
  521. #9'shr.u32 '#9'%r133, %r100, 31;'
  522. #9'shl.b32 '#9'%r134, %r90, 1;'
  523. #9'add.u32 '#9'%r90, %r133, %r134;'
  524. #9'.loc'#9'18'#9'1685'#9'0'
  525. #9'shl.b32 '#9'%r100, %r100, 1;'
  526. #9'.loc'#9'18'#9'1686'#9'0'
  527. #9'sub.u32 '#9'%r127, %r127, 1;'
  528. '$Lt_0_57090:'
  529. #9'.loc'#9'18'#9'1688'#9'0'
  530. #9'mov.u32 '#9'%r135, 0;'
  531. #9'set.ne.u32.u32 '#9'%r136, %r100, %r135;'
  532. #9'neg.s32 '#9'%r137, %r136;'
  533. #9'add.u32 '#9'%r90, %r137, %r90;'
  534. #9'shl.b32 '#9'%r138, %r90, 24;'
  535. #9'mov.s32 '#9'%r139, 0;'
  536. #9'set.lt.u32.s32 '#9'%r140, %r138, %r139;'
  537. #9'neg.s32 '#9'%r141, %r140;'
  538. #9'shr.u32 '#9'%r142, %r90, 8;'
  539. #9'add.u32 '#9'%r143, %r127, 126;'
  540. #9'shl.b32 '#9'%r144, %r143, 23;'
  541. #9'add.u32 '#9'%r145, %r142, %r144;'
  542. #9'add.u32 '#9'%r146, %r141, %r145;'
  543. #9'or.b32 '#9'%r147, %r87, %r146;'
  544. #9'mov.b32 '#9'%f40, %r147;'
  545. #9'bra.uni '#9'$Lt_0_2562;'
  546. '$Lt_0_52994:'
  547. #9'.loc'#9'18'#9'1703'#9'0'
  548. #9'mov.f32 '#9'%f41, 0f3f22f983; '#9'// 0.63662'
  549. #9'.loc'#9'18'#9'1946'#9'0'
  550. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  551. #9'.loc'#9'18'#9'1703'#9'0'
  552. #9'mul.f32 '#9'%f9, %f1, %f41;'
  553. #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
  554. #9'mov.s32 '#9'%r125, %r76;'
  555. #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
  556. #9'neg.f32 '#9'%f11, %f10;'
  557. #9'mov.f32 '#9'%f42, 0f3fc90000; '#9'// 1.57031'
  558. #9'mad.f32 '#9'%f43, %f42, %f11, %f1;'
  559. #9'mov.f32 '#9'%f44, 0f39fd8000; '#9'// 0.000483513'
  560. #9'mad.f32 '#9'%f45, %f44, %f11, %f43;'
  561. #9'mov.f32 '#9'%f46, 0f34a88000; '#9'// 3.13856e-007'
  562. #9'mad.f32 '#9'%f47, %f46, %f11, %f45;'
  563. #9'mov.f32 '#9'%f48, 0f2e85a309; '#9'// 6.0771e-011'
  564. #9'mad.f32 '#9'%f40, %f48, %f11, %f47;'
  565. #9'mov.u32 '#9'%r86, __cuda_result_44;'
  566. '$Lt_0_2562:'
  567. #9'.loc'#9'18'#9'1872'#9'0'
  568. #9'mul.f32 '#9'%f49, %f40, %f40;'
  569. #9'and.b32 '#9'%r148, %r125, 1;'
  570. #9'mov.u32 '#9'%r149, 0;'
  571. #9'setp.eq.s32 '#9'%p24, %r148, %r149;'
  572. #9'@%p24 bra '#9'$Lt_0_57858;'
  573. #9'.loc'#9'18'#9'1875'#9'0'
  574. #9'mov.f32 '#9'%f50, 0f3f800000; '#9'// 1'
  575. #9'mov.f32 '#9'%f51, 0fbf000000; '#9'// -0.5'
  576. #9'mov.f32 '#9'%f52, 0f3d2aaaa5; '#9'// 0.0416666'
  577. #9'mov.f32 '#9'%f53, 0fbab6061a; '#9'// -0.00138873'
  578. #9'mov.f32 '#9'%f54, 0f37ccf5ce; '#9'// 2.44332e-005'
  579. #9'mad.f32 '#9'%f55, %f54, %f49, %f53;'
  580. #9'mad.f32 '#9'%f56, %f49, %f55, %f52;'
  581. #9'mad.f32 '#9'%f57, %f49, %f56, %f51;'
  582. #9'mad.f32 '#9'%f58, %f49, %f57, %f50;'
  583. #9'bra.uni '#9'$Lt_0_57602;'
  584. '$Lt_0_57858:'
  585. #9'.loc'#9'18'#9'1877'#9'0'
  586. #9'mov.f32 '#9'%f59, 0fbe2aaaa3; '#9'// -0.166667'
  587. #9'mov.f32 '#9'%f60, 0f3c08839e; '#9'// 0.00833216'
  588. #9'mov.f32 '#9'%f61, 0fb94ca1f9; '#9'// -0.000195153'
  589. #9'mad.f32 '#9'%f62, %f61, %f49, %f60;'
  590. #9'mad.f32 '#9'%f63, %f49, %f62, %f59;'
  591. #9'mul.f32 '#9'%f64, %f49, %f63;'
  592. #9'mad.f32 '#9'%f58, %f64, %f40, %f40;'
  593. '$Lt_0_57602:'
  594. #9'.loc'#9'18'#9'1879'#9'0'
  595. #9'neg.f32 '#9'%f65, %f58;'
  596. #9'and.b32 '#9'%r150, %r125, 2;'
  597. #9'mov.s32 '#9'%r151, 0;'
  598. #9'setp.ne.s32 '#9'%p25, %r150, %r151;'
  599. #9'selp.f32 '#9'%f58, %f65, %f58, %p25;'
  600. #9'mov.f32 '#9'%f38, %f58;'
  601. '$Lt_0_2306:'
  602. #9'.loc'#9'15'#9'22'#9'0'
  603. #9'mov.u16 '#9'%rh1, %ctaid.y;'
  604. #9'mov.u16 '#9'%rh2, %ntid.y;'
  605. #9'mul.wide.u16 '#9'%r152, %rh1, %rh2;'
  606. #9'ld.param.s32 '#9'%r153, [__cudaparm_transformKernel_height];'
  607. #9'cvt.rn.f32.s32 '#9'%f66, %r153;'
  608. #9'mov.u16 '#9'%rh3, %ctaid.x;'
  609. #9'mov.u16 '#9'%rh4, %ntid.x;'
  610. #9'mul.wide.u16 '#9'%r154, %rh3, %rh4;'
  611. #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];'
  612. #9'cvt.rn.f32.s32 '#9'%f67, %r155;'
  613. #9'cvt.u32.u16 '#9'%r156, %tid.y;'
  614. #9'add.u32 '#9'%r157, %r156, %r152;'
  615. #9'cvt.u32.u16 '#9'%r158, %tid.x;'
  616. #9'add.u32 '#9'%r159, %r158, %r154;'
  617. #9'cvt.rn.f32.u32 '#9'%f68, %r157;'
  618. #9'cvt.rn.f32.u32 '#9'%f69, %r159;'
  619. #9'div.full.f32 '#9'%f70, %f68, %f66;'
  620. #9'div.full.f32 '#9'%f71, %f69, %f67;'
  621. #9'mov.f32 '#9'%f72, 0fbf000000; '#9'// -0.5'
  622. #9'add.f32 '#9'%f73, %f70, %f72;'
  623. #9'mov.f32 '#9'%f74, 0fbf000000; '#9'// -0.5'
  624. #9'add.f32 '#9'%f75, %f71, %f74;'
  625. #9'mul.f32 '#9'%f76, %f38, %f73;'
  626. #9'mul.f32 '#9'%f77, %f75, %f5;'
  627. #9'sub.f32 '#9'%f78, %f77, %f76;'
  628. #9'mov.f32 '#9'%f79, 0f3f000000; '#9'// 0.5'
  629. #9'add.f32 '#9'%f80, %f78, %f79;'
  630. #9'.loc'#9'18'#9'1946'#9'0'
  631. #9'@!%p1 bra '#9'$Lt_0_58114;'
  632. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  633. #9'neg.f32 '#9'%f81, %f1;'
  634. #9'add.rn.f32 '#9'%f82, %f1, %f81;'
  635. #9'bra.uni '#9'$Lt_0_1282;'
  636. '$Lt_0_58114:'
  637. #9'.loc'#9'18'#9'1622'#9'0'
  638. #9'mov.f32 '#9'%f83, 0f473ba700; '#9'// 48039'
  639. #9'setp.gt.f32 '#9'%p26, %f2, %f83;'
  640. #9'@!%p26 bra '#9'$Lt_0_58626;'
  641. #9'.loc'#9'18'#9'1946'#9'0'
  642. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  643. #9'.loc'#9'18'#9'1625'#9'0'
  644. #9'mov.b32 '#9'%r3, %f1;'
  645. #9'and.b32 '#9'%r4, %r3, -2147483648;'
  646. #9'mov.s32 '#9'%r5, %r4;'
  647. #9'.loc'#9'18'#9'24'#9'0'
  648. #9'shl.b32 '#9'%r6, %r3, 8;'
  649. #9'mov.s32 '#9'%r7, %r1;'
  650. #9'add.u32 '#9'%r8, %r1, 24;'
  651. #9'mov.u32 '#9'%r9, __cuda_result_16;'
  652. #9'or.b32 '#9'%r10, %r6, -2147483648;'
  653. #9'mov.u32 '#9'%r11, 0;'
  654. '$Lt_0_59650:'
  655. ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
  656. #9'.loc'#9'18'#9'1642'#9'0'
  657. #9'ld.const.u32 '#9'%r160, [%r7+0];'
  658. #9'mul.lo.u32 '#9'%r161, %r160, %r10;'
  659. #9'add.u32 '#9'%r162, %r161, %r11;'
  660. #9'.loc'#9'18'#9'1643'#9'0'
  661. #9'set.gt.u32.u32 '#9'%r163, %r161, %r162;'
  662. #9'neg.s32 '#9'%r164, %r163;'
  663. #9'mul.hi.u32 '#9'%r165, %r160, %r10;'
  664. #9'add.u32 '#9'%r11, %r164, %r165;'
  665. #9'.loc'#9'18'#9'1644'#9'0'
  666. #9'st.local.u32 '#9'[%r9+0], %r162;'
  667. #9'add.u32 '#9'%r9, %r9, 4;'
  668. #9'add.u32 '#9'%r7, %r7, 4;'
  669. #9'setp.ne.u32 '#9'%p27, %r7, %r8;'
  670. #9'@%p27 bra '#9'$Lt_0_59650;'
  671. #9'.loc'#9'18'#9'1646'#9'0'
  672. #9'st.local.u32 '#9'[__cuda_result_16+24], %r11;'
  673. #9'.loc'#9'18'#9'1651'#9'0'
  674. #9'shl.b32 '#9'%r166, %r3, 1;'
  675. #9'shr.u32 '#9'%r19, %r166, 24;'
  676. #9'sub.u32 '#9'%r20, %r19, 128;'
  677. #9'shr.u32 '#9'%r21, %r20, 5;'
  678. #9'mov.s32 '#9'%r167, 4;'
  679. #9'sub.s32 '#9'%r23, %r167, %r21;'
  680. #9'mul.lo.u32 '#9'%r24, %r23, 4;'
  681. #9'add.u32 '#9'%r25, %r24, %r2;'
  682. #9'ld.local.u32 '#9'%r11, [%r25+8];'
  683. #9'.loc'#9'18'#9'1652'#9'0'
  684. #9'ld.local.u32 '#9'%r26, [%r25+4];'
  685. #9'and.b32 '#9'%r27, %r20, 31;'
  686. #9'mov.u32 '#9'%r168, 0;'
  687. #9'setp.eq.u32 '#9'%p28, %r27, %r168;'
  688. #9'@%p28 bra '#9'$Lt_0_60162;'
  689. #9'.loc'#9'18'#9'1655'#9'0'
  690. #9'mov.s32 '#9'%r169, 32;'
  691. #9'sub.s32 '#9'%r30, %r169, %r27;'
  692. #9'shr.u32 '#9'%r170, %r26, %r30;'
  693. #9'shl.b32 '#9'%r171, %r11, %r27;'
  694. #9'add.u32 '#9'%r11, %r170, %r171;'
  695. #9'.loc'#9'18'#9'1656'#9'0'
  696. #9'ld.local.u32 '#9'%r172, [%r25+0];'
  697. #9'shr.u32 '#9'%r173, %r172, %r30;'
  698. #9'shl.b32 '#9'%r174, %r26, %r27;'
  699. #9'add.u32 '#9'%r26, %r173, %r174;'
  700. '$Lt_0_60162:'
  701. #9'.loc'#9'18'#9'1658'#9'0'
  702. #9'shr.u32 '#9'%r36, %r11, 30;'
  703. #9'.loc'#9'18'#9'1660'#9'0'
  704. #9'shr.u32 '#9'%r175, %r26, 30;'
  705. #9'shl.b32 '#9'%r176, %r11, 2;'
  706. #9'add.u32 '#9'%r11, %r175, %r176;'
  707. #9'.loc'#9'18'#9'1661'#9'0'
  708. #9'shl.b32 '#9'%r26, %r26, 2;'
  709. #9'mov.u32 '#9'%r177, 0;'
  710. #9'setp.eq.u32 '#9'%p29, %r26, %r177;'
  711. #9'@%p29 bra '#9'$Lt_0_60930;'
  712. #9'.loc'#9'18'#9'1662'#9'0'
  713. #9'add.u32 '#9'%r178, %r11, 1;'
  714. #9'mov.u32 '#9'%r179, -2147483648;'
  715. #9'set.gt.u32.u32 '#9'%r180, %r178, %r179;'
  716. #9'neg.s32 '#9'%r181, %r180;'
  717. #9'bra.uni '#9'$Lt_0_60674;'
  718. '$Lt_0_60930:'
  719. #9'mov.u32 '#9'%r182, -2147483648;'
  720. #9'set.gt.u32.u32 '#9'%r183, %r11, %r182;'
  721. #9'neg.s32 '#9'%r181, %r183;'
  722. '$Lt_0_60674:'
  723. #9'.loc'#9'18'#9'1663'#9'0'
  724. #9'add.u32 '#9'%r36, %r36, %r181;'
  725. #9'.loc'#9'18'#9'1662'#9'0'
  726. #9'neg.s32 '#9'%r184, %r36;'
  727. #9'mov.u32 '#9'%r185, 0;'
  728. #9'setp.ne.u32 '#9'%p30, %r4, %r185;'
  729. #9'selp.s32 '#9'%r36, %r184, %r36, %p30;'
  730. #9'mov.u32 '#9'%r186, 0;'
  731. #9'setp.eq.u32 '#9'%p31, %r181, %r186;'
  732. #9'@%p31 bra '#9'$Lt_0_61186;'
  733. #9'.loc'#9'18'#9'1668'#9'0'
  734. #9'neg.s32 '#9'%r26, %r26;'
  735. #9'.loc'#9'18'#9'1670'#9'0'
  736. #9'mov.u32 '#9'%r187, 0;'
  737. #9'set.eq.u32.u32 '#9'%r188, %r26, %r187;'
  738. #9'neg.s32 '#9'%r189, %r188;'
  739. #9'not.b32 '#9'%r190, %r11;'
  740. #9'add.u32 '#9'%r11, %r189, %r190;'
  741. #9'.loc'#9'18'#9'1671'#9'0'
  742. #9'xor.b32 '#9'%r5, %r4, -2147483648;'
  743. '$Lt_0_61186:'
  744. #9'.loc'#9'18'#9'1673'#9'0'
  745. #9'mov.s32 '#9'%r53, %r36;'
  746. #9'mov.u32 '#9'%r191, 0;'
  747. #9'setp.le.s32 '#9'%p32, %r11, %r191;'
  748. #9'@%p32 bra '#9'$Lt_0_69890;'
  749. #9'mov.u32 '#9'%r55, 0;'
  750. '$Lt_0_62210:'
  751. ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
  752. 'tions: unknown'
  753. #9'.loc'#9'18'#9'1677'#9'0'
  754. #9'shr.u32 '#9'%r192, %r26, 31;'
  755. #9'shl.b32 '#9'%r193, %r11, 1;'
  756. #9'add.u32 '#9'%r11, %r192, %r193;'
  757. #9'.loc'#9'18'#9'1678'#9'0'
  758. #9'shl.b32 '#9'%r26, %r26, 1;'
  759. #9'.loc'#9'18'#9'1679'#9'0'
  760. #9'sub.u32 '#9'%r55, %r55, 1;'
  761. #9'mov.u32 '#9'%r194, 0;'
  762. #9'setp.gt.s32 '#9'%p33, %r11, %r194;'
  763. #9'@%p33 bra '#9'$Lt_0_62210;'
  764. #9'bra.uni '#9'$Lt_0_61698;'
  765. '$Lt_0_69890:'
  766. #9'mov.u32 '#9'%r55, 0;'
  767. '$Lt_0_61698:'
  768. #9'.loc'#9'18'#9'1681'#9'0'
  769. #9'mul.lo.u32 '#9'%r26, %r11, -921707870;'
  770. #9'.loc'#9'18'#9'1682'#9'0'
  771. #9'mov.u32 '#9'%r195, -921707870;'
  772. #9'mul.hi.u32 '#9'%r11, %r11, %r195;'
  773. #9'mov.u32 '#9'%r196, 0;'
  774. #9'setp.le.s32 '#9'%p34, %r11, %r196;'
  775. #9'@%p34 bra '#9'$Lt_0_62722;'
  776. #9'.loc'#9'18'#9'1684'#9'0'
  777. #9'shr.u32 '#9'%r197, %r26, 31;'
  778. #9'shl.b32 '#9'%r198, %r11, 1;'
  779. #9'add.u32 '#9'%r11, %r197, %r198;'
  780. #9'.loc'#9'18'#9'1685'#9'0'
  781. #9'shl.b32 '#9'%r26, %r26, 1;'
  782. #9'.loc'#9'18'#9'1686'#9'0'
  783. #9'sub.u32 '#9'%r55, %r55, 1;'
  784. '$Lt_0_62722:'
  785. #9'.loc'#9'18'#9'1688'#9'0'
  786. #9'mov.u32 '#9'%r199, 0;'
  787. #9'set.ne.u32.u32 '#9'%r200, %r26, %r199;'
  788. #9'neg.s32 '#9'%r201, %r200;'
  789. #9'add.u32 '#9'%r11, %r201, %r11;'
  790. #9'shl.b32 '#9'%r202, %r11, 24;'
  791. #9'mov.s32 '#9'%r203, 0;'
  792. #9'set.lt.u32.s32 '#9'%r204, %r202, %r203;'
  793. #9'neg.s32 '#9'%r205, %r204;'
  794. #9'shr.u32 '#9'%r206, %r11, 8;'
  795. #9'add.u32 '#9'%r207, %r55, 126;'
  796. #9'shl.b32 '#9'%r208, %r207, 23;'
  797. #9'add.u32 '#9'%r209, %r206, %r208;'
  798. #9'add.u32 '#9'%r210, %r205, %r209;'
  799. #9'or.b32 '#9'%r211, %r5, %r210;'
  800. #9'mov.b32 '#9'%f7, %r211;'
  801. #9'bra.uni '#9'$Lt_0_1538;'
  802. '$Lt_0_58626:'
  803. #9'.loc'#9'18'#9'1703'#9'0'
  804. #9'mov.f32 '#9'%f84, 0f3f22f983; '#9'// 0.63662'
  805. #9'.loc'#9'18'#9'1946'#9'0'
  806. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  807. #9'.loc'#9'18'#9'1703'#9'0'
  808. #9'mul.f32 '#9'%f9, %f1, %f84;'
  809. #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
  810. #9'mov.s32 '#9'%r53, %r76;'
  811. #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
  812. #9'neg.f32 '#9'%f11, %f10;'
  813. #9'mov.f32 '#9'%f85, 0f3fc90000; '#9'// 1.57031'
  814. #9'mad.f32 '#9'%f86, %f85, %f11, %f1;'
  815. #9'mov.f32 '#9'%f87, 0f39fd8000; '#9'// 0.000483513'
  816. #9'mad.f32 '#9'%f88, %f87, %f11, %f86;'
  817. #9'mov.f32 '#9'%f89, 0f34a88000; '#9'// 3.13856e-007'
  818. #9'mad.f32 '#9'%f90, %f89, %f11, %f88;'
  819. #9'mov.f32 '#9'%f91, 0f2e85a309; '#9'// 6.0771e-011'
  820. #9'mad.f32 '#9'%f7, %f91, %f11, %f90;'
  821. '$Lt_0_1538:'
  822. #9'.loc'#9'18'#9'1949'#9'0'
  823. #9'add.s32 '#9'%r77, %r53, 1;'
  824. #9'mul.f32 '#9'%f19, %f7, %f7;'
  825. #9'and.b32 '#9'%r212, %r77, 1;'
  826. #9'mov.u32 '#9'%r213, 0;'
  827. #9'setp.eq.s32 '#9'%p35, %r212, %r213;'
  828. #9'@%p35 bra '#9'$Lt_0_63490;'
  829. #9'.loc'#9'18'#9'1953'#9'0'
  830. #9'mov.f32 '#9'%f92, 0f3f800000; '#9'// 1'
  831. #9'mov.f32 '#9'%f93, 0fbf000000; '#9'// -0.5'
  832. #9'mov.f32 '#9'%f94, 0f3d2aaaa5; '#9'// 0.0416666'
  833. #9'mov.f32 '#9'%f95, 0fbab6061a; '#9'// -0.00138873'
  834. #9'mov.f32 '#9'%f96, 0f37ccf5ce; '#9'// 2.44332e-005'
  835. #9'mad.f32 '#9'%f97, %f96, %f19, %f95;'
  836. #9'mad.f32 '#9'%f98, %f19, %f97, %f94;'
  837. #9'mad.f32 '#9'%f99, %f19, %f98, %f93;'
  838. #9'mad.f32 '#9'%f28, %f19, %f99, %f92;'
  839. #9'bra.uni '#9'$Lt_0_63234;'
  840. '$Lt_0_63490:'
  841. #9'.loc'#9'18'#9'1955'#9'0'
  842. #9'mov.f32 '#9'%f100, 0fbe2aaaa3; '#9'// -0.166667'
  843. #9'mov.f32 '#9'%f101, 0f3c08839e; '#9'// 0.00833216'
  844. #9'mov.f32 '#9'%f102, 0fb94ca1f9; '#9'// -0.000195153'
  845. #9'mad.f32 '#9'%f103, %f102, %f19, %f101;'
  846. #9'mad.f32 '#9'%f104, %f19, %f103, %f100;'
  847. #9'mul.f32 '#9'%f105, %f19, %f104;'
  848. #9'mad.f32 '#9'%f28, %f105, %f7, %f7;'
  849. '$Lt_0_63234:'
  850. #9'.loc'#9'18'#9'1957'#9'0'
  851. #9'neg.f32 '#9'%f106, %f28;'
  852. #9'and.b32 '#9'%r214, %r77, 2;'
  853. #9'mov.s32 '#9'%r215, 0;'
  854. #9'setp.ne.s32 '#9'%p36, %r214, %r215;'
  855. #9'selp.f32 '#9'%f28, %f106, %f28, %p36;'
  856. #9'mov.f32 '#9'%f82, %f28;'
  857. '$Lt_0_1282:'
  858. #9'.loc'#9'18'#9'1869'#9'0'
  859. #9'mov.u32 '#9'%r216, 0;'
  860. #9'setp.eq.s32 '#9'%p37, %r84, %r216;'
  861. #9'@%p37 bra '#9'$Lt_0_63746;'
  862. #9'mov.f32 '#9'%f107, 0f00000000; '#9'// 0'
  863. #9'.loc'#9'18'#9'1946'#9'0'
  864. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  865. #9'.loc'#9'18'#9'1869'#9'0'
  866. #9'mul.rn.f32 '#9'%f38, %f1, %f107;'
  867. #9'bra.uni '#9'$Lt_0_258;'
  868. '$Lt_0_63746:'
  869. #9'.loc'#9'18'#9'1622'#9'0'
  870. #9'mov.f32 '#9'%f108, 0f473ba700; '#9'// 48039'
  871. #9'setp.gt.f32 '#9'%p38, %f2, %f108;'
  872. #9'@!%p38 bra '#9'$Lt_0_64258;'
  873. #9'.loc'#9'18'#9'1946'#9'0'
  874. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  875. #9'.loc'#9'18'#9'1625'#9'0'
  876. #9'mov.b32 '#9'%r3, %f1;'
  877. #9'and.b32 '#9'%r4, %r3, -2147483648;'
  878. #9'mov.s32 '#9'%r87, %r4;'
  879. #9'.loc'#9'18'#9'24'#9'0'
  880. #9'shl.b32 '#9'%r6, %r3, 8;'
  881. #9'mov.s32 '#9'%r88, %r1;'
  882. #9'add.u32 '#9'%r8, %r1, 24;'
  883. #9'mov.u32 '#9'%r89, __cuda_result_44;'
  884. #9'or.b32 '#9'%r10, %r6, -2147483648;'
  885. #9'mov.u32 '#9'%r90, 0;'
  886. '$Lt_0_65282:'
  887. ' //<loop> Loop body line 24, nesting depth: 1, iterations: 6'
  888. #9'.loc'#9'18'#9'1642'#9'0'
  889. #9'ld.const.u32 '#9'%r217, [%r88+0];'
  890. #9'mul.lo.u32 '#9'%r218, %r217, %r10;'
  891. #9'add.u32 '#9'%r219, %r218, %r90;'
  892. #9'.loc'#9'18'#9'1643'#9'0'
  893. #9'set.gt.u32.u32 '#9'%r220, %r218, %r219;'
  894. #9'neg.s32 '#9'%r221, %r220;'
  895. #9'mul.hi.u32 '#9'%r222, %r217, %r10;'
  896. #9'add.u32 '#9'%r90, %r221, %r222;'
  897. #9'.loc'#9'18'#9'1644'#9'0'
  898. #9'st.local.u32 '#9'[%r89+0], %r219;'
  899. #9'add.u32 '#9'%r89, %r89, 4;'
  900. #9'add.u32 '#9'%r88, %r88, 4;'
  901. #9'setp.ne.u32 '#9'%p39, %r88, %r8;'
  902. #9'@%p39 bra '#9'$Lt_0_65282;'
  903. #9'.loc'#9'18'#9'1646'#9'0'
  904. #9'st.local.u32 '#9'[__cuda_result_44+24], %r90;'
  905. #9'.loc'#9'18'#9'1651'#9'0'
  906. #9'shl.b32 '#9'%r223, %r3, 1;'
  907. #9'shr.u32 '#9'%r19, %r223, 24;'
  908. #9'sub.u32 '#9'%r20, %r19, 128;'
  909. #9'shr.u32 '#9'%r21, %r20, 5;'
  910. #9'mov.s32 '#9'%r224, 4;'
  911. #9'sub.s32 '#9'%r23, %r224, %r21;'
  912. #9'mul.lo.u32 '#9'%r24, %r23, 4;'
  913. #9'add.u32 '#9'%r99, %r24, %r86;'
  914. #9'ld.local.u32 '#9'%r90, [%r99+8];'
  915. #9'.loc'#9'18'#9'1652'#9'0'
  916. #9'ld.local.u32 '#9'%r100, [%r99+4];'
  917. #9'and.b32 '#9'%r27, %r20, 31;'
  918. #9'mov.u32 '#9'%r225, 0;'
  919. #9'setp.eq.u32 '#9'%p40, %r27, %r225;'
  920. #9'@%p40 bra '#9'$Lt_0_65794;'
  921. #9'.loc'#9'18'#9'1655'#9'0'
  922. #9'mov.s32 '#9'%r226, 32;'
  923. #9'sub.s32 '#9'%r30, %r226, %r27;'
  924. #9'shr.u32 '#9'%r227, %r100, %r30;'
  925. #9'shl.b32 '#9'%r228, %r90, %r27;'
  926. #9'add.u32 '#9'%r90, %r227, %r228;'
  927. #9'.loc'#9'18'#9'1656'#9'0'
  928. #9'ld.local.u32 '#9'%r229, [%r99+0];'
  929. #9'shr.u32 '#9'%r230, %r229, %r30;'
  930. #9'shl.b32 '#9'%r231, %r100, %r27;'
  931. #9'add.u32 '#9'%r100, %r230, %r231;'
  932. '$Lt_0_65794:'
  933. #9'.loc'#9'18'#9'1658'#9'0'
  934. #9'shr.u32 '#9'%r108, %r90, 30;'
  935. #9'.loc'#9'18'#9'1660'#9'0'
  936. #9'shr.u32 '#9'%r232, %r100, 30;'
  937. #9'shl.b32 '#9'%r233, %r90, 2;'
  938. #9'add.u32 '#9'%r90, %r232, %r233;'
  939. #9'.loc'#9'18'#9'1661'#9'0'
  940. #9'shl.b32 '#9'%r100, %r100, 2;'
  941. #9'mov.u32 '#9'%r234, 0;'
  942. #9'setp.eq.u32 '#9'%p41, %r100, %r234;'
  943. #9'@%p41 bra '#9'$Lt_0_66562;'
  944. #9'.loc'#9'18'#9'1662'#9'0'
  945. #9'add.u32 '#9'%r235, %r90, 1;'
  946. #9'mov.u32 '#9'%r236, -2147483648;'
  947. #9'set.gt.u32.u32 '#9'%r237, %r235, %r236;'
  948. #9'neg.s32 '#9'%r238, %r237;'
  949. #9'bra.uni '#9'$Lt_0_66306;'
  950. '$Lt_0_66562:'
  951. #9'mov.u32 '#9'%r239, -2147483648;'
  952. #9'set.gt.u32.u32 '#9'%r240, %r90, %r239;'
  953. #9'neg.s32 '#9'%r238, %r240;'
  954. '$Lt_0_66306:'
  955. #9'.loc'#9'18'#9'1663'#9'0'
  956. #9'add.u32 '#9'%r108, %r108, %r238;'
  957. #9'.loc'#9'18'#9'1662'#9'0'
  958. #9'neg.s32 '#9'%r241, %r108;'
  959. #9'mov.u32 '#9'%r242, 0;'
  960. #9'setp.ne.u32 '#9'%p42, %r4, %r242;'
  961. #9'selp.s32 '#9'%r108, %r241, %r108, %p42;'
  962. #9'mov.u32 '#9'%r243, 0;'
  963. #9'setp.eq.u32 '#9'%p43, %r238, %r243;'
  964. #9'@%p43 bra '#9'$Lt_0_66818;'
  965. #9'.loc'#9'18'#9'1668'#9'0'
  966. #9'neg.s32 '#9'%r100, %r100;'
  967. #9'.loc'#9'18'#9'1670'#9'0'
  968. #9'mov.u32 '#9'%r244, 0;'
  969. #9'set.eq.u32.u32 '#9'%r245, %r100, %r244;'
  970. #9'neg.s32 '#9'%r246, %r245;'
  971. #9'not.b32 '#9'%r247, %r90;'
  972. #9'add.u32 '#9'%r90, %r246, %r247;'
  973. #9'.loc'#9'18'#9'1671'#9'0'
  974. #9'xor.b32 '#9'%r87, %r4, -2147483648;'
  975. '$Lt_0_66818:'
  976. #9'.loc'#9'18'#9'1673'#9'0'
  977. #9'mov.s32 '#9'%r125, %r108;'
  978. #9'mov.u32 '#9'%r248, 0;'
  979. #9'setp.le.s32 '#9'%p44, %r90, %r248;'
  980. #9'@%p44 bra '#9'$Lt_0_70146;'
  981. #9'mov.u32 '#9'%r127, 0;'
  982. '$Lt_0_67842:'
  983. ' //<loop> Loop body line 1673, nesting depth: 1, estimated itera' +
  984. 'tions: unknown'
  985. #9'.loc'#9'18'#9'1677'#9'0'
  986. #9'shr.u32 '#9'%r249, %r100, 31;'
  987. #9'shl.b32 '#9'%r250, %r90, 1;'
  988. #9'add.u32 '#9'%r90, %r249, %r250;'
  989. #9'.loc'#9'18'#9'1678'#9'0'
  990. #9'shl.b32 '#9'%r100, %r100, 1;'
  991. #9'.loc'#9'18'#9'1679'#9'0'
  992. #9'sub.u32 '#9'%r127, %r127, 1;'
  993. #9'mov.u32 '#9'%r251, 0;'
  994. #9'setp.gt.s32 '#9'%p45, %r90, %r251;'
  995. #9'@%p45 bra '#9'$Lt_0_67842;'
  996. #9'bra.uni '#9'$Lt_0_67330;'
  997. '$Lt_0_70146:'
  998. #9'mov.u32 '#9'%r127, 0;'
  999. '$Lt_0_67330:'
  1000. #9'.loc'#9'18'#9'1681'#9'0'
  1001. #9'mul.lo.u32 '#9'%r100, %r90, -921707870;'
  1002. #9'.loc'#9'18'#9'1682'#9'0'
  1003. #9'mov.u32 '#9'%r252, -921707870;'
  1004. #9'mul.hi.u32 '#9'%r90, %r90, %r252;'
  1005. #9'mov.u32 '#9'%r253, 0;'
  1006. #9'setp.le.s32 '#9'%p46, %r90, %r253;'
  1007. #9'@%p46 bra '#9'$Lt_0_68354;'
  1008. #9'.loc'#9'18'#9'1684'#9'0'
  1009. #9'shr.u32 '#9'%r254, %r100, 31;'
  1010. #9'shl.b32 '#9'%r255, %r90, 1;'
  1011. #9'add.u32 '#9'%r90, %r254, %r255;'
  1012. #9'.loc'#9'18'#9'1685'#9'0'
  1013. #9'shl.b32 '#9'%r100, %r100, 1;'
  1014. #9'.loc'#9'18'#9'1686'#9'0'
  1015. #9'sub.u32 '#9'%r127, %r127, 1;'
  1016. '$Lt_0_68354:'
  1017. #9'.loc'#9'18'#9'1688'#9'0'
  1018. #9'mov.u32 '#9'%r256, 0;'
  1019. #9'set.ne.u32.u32 '#9'%r257, %r100, %r256;'
  1020. #9'neg.s32 '#9'%r258, %r257;'
  1021. #9'add.u32 '#9'%r90, %r258, %r90;'
  1022. #9'shl.b32 '#9'%r259, %r90, 24;'
  1023. #9'mov.s32 '#9'%r260, 0;'
  1024. #9'set.lt.u32.s32 '#9'%r261, %r259, %r260;'
  1025. #9'neg.s32 '#9'%r262, %r261;'
  1026. #9'shr.u32 '#9'%r263, %r90, 8;'
  1027. #9'add.u32 '#9'%r264, %r127, 126;'
  1028. #9'shl.b32 '#9'%r265, %r264, 23;'
  1029. #9'add.u32 '#9'%r266, %r263, %r265;'
  1030. #9'add.u32 '#9'%r267, %r262, %r266;'
  1031. #9'or.b32 '#9'%r268, %r87, %r267;'
  1032. #9'mov.b32 '#9'%f40, %r268;'
  1033. #9'bra.uni '#9'$Lt_0_514;'
  1034. '$Lt_0_64258:'
  1035. #9'.loc'#9'18'#9'1703'#9'0'
  1036. #9'mov.f32 '#9'%f109, 0f3f22f983; '#9'// 0.63662'
  1037. #9'.loc'#9'18'#9'1946'#9'0'
  1038. #9'ld.param.f32 '#9'%f1, [__cudaparm_transformKernel_theta];'
  1039. #9'.loc'#9'18'#9'1703'#9'0'
  1040. #9'mul.f32 '#9'%f9, %f1, %f109;'
  1041. #9'cvt.rni.s32.f32 '#9'%r76, %f9;'
  1042. #9'mov.s32 '#9'%r125, %r76;'
  1043. #9'cvt.rn.f32.s32 '#9'%f10, %r76;'
  1044. #9'neg.f32 '#9'%f11, %f10;'
  1045. #9'mov.f32 '#9'%f110, 0f3fc90000; '#9'// 1.57031'
  1046. #9'mad.f32 '#9'%f111, %f110, %f11, %f1;'
  1047. #9'mov.f32 '#9'%f112, 0f39fd8000; '#9'// 0.000483513'
  1048. #9'mad.f32 '#9'%f113, %f112, %f11, %f111;'
  1049. #9'mov.f32 '#9'%f114, 0f34a88000; '#9'// 3.13856e-007'
  1050. #9'mad.f32 '#9'%f115, %f114, %f11, %f113;'
  1051. #9'mov.f32 '#9'%f116, 0f2e85a309; '#9'// 6.0771e-011'
  1052. #9'mad.f32 '#9'%f40, %f116, %f11, %f115;'
  1053. '$Lt_0_514:'
  1054. #9'.loc'#9'18'#9'1872'#9'0'
  1055. #9'mul.f32 '#9'%f49, %f40, %f40;'
  1056. #9'and.b32 '#9'%r269, %r125, 1;'
  1057. #9'mov.u32 '#9'%r270, 0;'
  1058. #9'setp.eq.s32 '#9'%p47, %r269, %r270;'
  1059. #9'@%p47 bra '#9'$Lt_0_69122;'
  1060. #9'.loc'#9'18'#9'1875'#9'0'
  1061. #9'mov.f32 '#9'%f117, 0f3f800000; '#9'// 1'
  1062. #9'mov.f32 '#9'%f118, 0fbf000000; '#9'// -0.5'
  1063. #9'mov.f32 '#9'%f119, 0f3d2aaaa5; '#9'// 0.0416666'
  1064. #9'mov.f32 '#9'%f120, 0fbab6061a; '#9'// -0.00138873'
  1065. #9'mov.f32 '#9'%f121, 0f37ccf5ce; '#9'// 2.44332e-005'
  1066. #9'mad.f32 '#9'%f122, %f121, %f49, %f120;'
  1067. #9'mad.f32 '#9'%f123, %f49, %f122, %f119;'
  1068. #9'mad.f32 '#9'%f124, %f49, %f123, %f118;'
  1069. #9'mad.f32 '#9'%f58, %f49, %f124, %f117;'
  1070. #9'bra.uni '#9'$Lt_0_68866;'
  1071. '$Lt_0_69122:'
  1072. #9'.loc'#9'18'#9'1877'#9'0'
  1073. #9'mov.f32 '#9'%f125, 0fbe2aaaa3; '#9'// -0.166667'
  1074. #9'mov.f32 '#9'%f126, 0f3c08839e; '#9'// 0.00833216'
  1075. #9'mov.f32 '#9'%f127, 0fb94ca1f9; '#9'// -0.000195153'
  1076. #9'mad.f32 '#9'%f128, %f127, %f49, %f126;'
  1077. #9'mad.f32 '#9'%f129, %f49, %f128, %f125;'
  1078. #9'mul.f32 '#9'%f130, %f49, %f129;'
  1079. #9'mad.f32 '#9'%f58, %f130, %f40, %f40;'
  1080. '$Lt_0_68866:'
  1081. #9'.loc'#9'18'#9'1879'#9'0'
  1082. #9'neg.f32 '#9'%f131, %f58;'
  1083. #9'and.b32 '#9'%r271, %r125, 2;'
  1084. #9'mov.s32 '#9'%r272, 0;'
  1085. #9'setp.ne.s32 '#9'%p48, %r271, %r272;'
  1086. #9'selp.f32 '#9'%f58, %f131, %f58, %p48;'
  1087. #9'mov.f32 '#9'%f38, %f58;'
  1088. '$Lt_0_258:'
  1089. #9'.loc'#9'15'#9'23'#9'0'
  1090. #9'mov.f32 '#9'%f132, %f80;'
  1091. #9'mul.f32 '#9'%f133, %f82, %f73;'
  1092. #9'mad.f32 '#9'%f134, %f75, %f38, %f133;'
  1093. #9'mov.f32 '#9'%f135, 0f3f000000; '#9'// 0.5'
  1094. #9'add.f32 '#9'%f136, %f134, %f135;'
  1095. #9'mov.f32 '#9'%f137, 0f00000000; '#9'// 0'
  1096. #9'mov.f32 '#9'%f138, 0f00000000; '#9'// 0'
  1097. #9'tex.2d.v4.f32.f32 {%f139,%f140,%f141,%f142},[tex,{%f132,%f136,%' +
  1098. 'f137,%f138}];'
  1099. #9'.loc'#9'15'#9'26'#9'0'
  1100. #9'mov.f32 '#9'%f143, %f139;'
  1101. #9'ld.param.u32 '#9'%r273, [__cudaparm_transformKernel_g_odata];'
  1102. #9'.loc'#9'15'#9'22'#9'0'
  1103. #9'ld.param.s32 '#9'%r155, [__cudaparm_transformKernel_width];'
  1104. #9'.loc'#9'15'#9'26'#9'0'
  1105. #9'mul.lo.u32 '#9'%r274, %r155, %r157;'
  1106. #9'add.u32 '#9'%r275, %r159, %r274;'
  1107. #9'mul.lo.u32 '#9'%r276, %r275, 4;'
  1108. #9'add.u32 '#9'%r277, %r273, %r276;'
  1109. #9'st.global.f32 '#9'[%r277+0], %f143;'
  1110. #9'.loc'#9'15'#9'27'#9'0'
  1111. #9'exit;'
  1112. '$LDWend_transformKernel:'
  1113. #9'} // transformKernel'
  1114. '')
  1115. object TurnPicture: TCUDAFunction
  1116. KernelName = 'transformKernel'
  1117. BlockShape.SizeX = 8
  1118. BlockShape.SizeY = 8
  1119. Grid.SizeX = 64
  1120. Grid.SizeY = 64
  1121. end
  1122. object Image: TCUDATexture
  1123. KernelName = 'tex'
  1124. AddressModeS = amWrap
  1125. AddressModeT = amWrap
  1126. FilterMode = fmLinear
  1127. Format = ctFloat
  1128. ChannelNum = cnOne
  1129. MemDataArray = TextureArray
  1130. end
  1131. end
  1132. object TextureArray: TCUDAMemData
  1133. Width = 512
  1134. Height = 512
  1135. MemoryType = mtArray
  1136. ChannelsType = ctFloat
  1137. end
  1138. object ResultData: TCUDAMemData
  1139. Width = 512
  1140. Height = 512
  1141. MemoryType = mtDevice
  1142. ChannelsType = ctFloat
  1143. end
  1144. end
  1145. object GLCUDADevice1: TGLCUDADevice
  1146. SelectDevice = 'GeForce GTX 1050 Ti (1)'
  1147. Left = 336
  1148. Top = 56
  1149. end
  1150. object GLCUDACompiler1: TGLCUDACompiler
  1151. Left = 558
  1152. Top = 59
  1153. end
  1154. end