avpcl_utils.cpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. /*
  2. Copyright 2007 nVidia, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
  4. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  5. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
  6. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  7. See the License for the specific language governing permissions and limitations under the License.
  8. */
  9. // Utility and common routines
  10. #include "avpcl_utils.h"
  11. #include "avpcl.h"
  12. #include "nvmath/vector.inl"
  13. #include <math.h>
  14. using namespace nv;
  15. using namespace AVPCL;
  16. static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
  17. static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64
  18. int Utils::lerp(int a, int b, int i, int bias, int denom)
  19. {
  20. #ifdef USE_ZOH_INTERP
  21. nvAssert (denom == 3 || denom == 7 || denom == 15);
  22. nvAssert (i >= 0 && i <= denom);
  23. nvAssert (bias >= 0 && bias <= denom/2);
  24. nvAssert (a >= 0 && b >= 0);
  25. int round = 0;
  26. #ifdef USE_ZOH_INTERP_ROUNDED
  27. round = 32;
  28. #endif
  29. switch (denom)
  30. {
  31. case 3: denom *= 5; i *= 5; // fall through to case 15
  32. case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
  33. case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
  34. default: nvUnreachable(); return 0;
  35. }
  36. #else
  37. return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
  38. #endif
  39. }
  40. Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
  41. {
  42. #ifdef USE_ZOH_INTERP
  43. nvAssert (denom == 3 || denom == 7 || denom == 15);
  44. nvAssert (i >= 0 && i <= denom);
  45. nvAssert (bias >= 0 && bias <= denom/2);
  46. // nvAssert (a >= 0 && b >= 0);
  47. // no need to bias these as this is an exact division
  48. switch (denom)
  49. {
  50. case 3: denom *= 5; i *= 5; // fall through to case 15
  51. case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
  52. case 7: return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
  53. default: nvUnreachable(); return Vector4(0);
  54. }
  55. #else
  56. return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
  57. #endif
  58. }
  59. int Utils::unquantize(int q, int prec)
  60. {
  61. int unq;
  62. nvAssert (prec > 3); // we only want to do one replicate
  63. #ifdef USE_ZOH_QUANT
  64. if (prec >= 8)
  65. unq = q;
  66. else if (q == 0)
  67. unq = 0;
  68. else if (q == ((1<<prec)-1))
  69. unq = 255;
  70. else
  71. unq = (q * 256 + 128) >> prec;
  72. #else
  73. // avpcl unquantizer -- bit replicate
  74. unq = (q << (8-prec)) | (q >> (2*prec-8));
  75. #endif
  76. return unq;
  77. }
  78. // quantize to the best value -- i.e., minimize unquantize error
  79. int Utils::quantize(float value, int prec)
  80. {
  81. int q, unq;
  82. nvAssert (prec > 3); // we only want to do one replicate
  83. unq = (int)floor(value + 0.5f);
  84. nvAssert (unq <= 255);
  85. #ifdef USE_ZOH_QUANT
  86. q = (prec >= 8) ? unq : (unq << prec) / 256;
  87. #else
  88. // avpcl quantizer -- scale properly for best possible bit-replicated result
  89. q = (unq * ((1<<prec)-1) + 127)/255;
  90. #endif
  91. nvAssert (q >= 0 && q < (1 << prec));
  92. return q;
  93. }
  94. float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
  95. {
  96. Vector4 err = a - b;
  97. // if nonuniform, select weights and weigh away
  98. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  99. {
  100. float rwt, gwt, bwt;
  101. if (AVPCL::flag_nonuniform)
  102. {
  103. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  104. }
  105. else /*if (AVPCL::flag_nonuniform_ati)*/
  106. {
  107. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  108. }
  109. // weigh the components
  110. err.x *= rwt;
  111. err.y *= gwt;
  112. err.z *= bwt;
  113. }
  114. return lengthSquared(err);
  115. }
  116. // WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
  117. float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
  118. {
  119. Vector3 err = a - b;
  120. // if nonuniform, select weights and weigh away
  121. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  122. {
  123. float rwt, gwt, bwt;
  124. if (AVPCL::flag_nonuniform)
  125. {
  126. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  127. }
  128. else if (AVPCL::flag_nonuniform_ati)
  129. {
  130. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  131. }
  132. // adjust weights based on rotatemode
  133. switch(rotatemode)
  134. {
  135. case ROTATEMODE_RGBA_RGBA: break;
  136. case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
  137. case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
  138. case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
  139. default: nvUnreachable();
  140. }
  141. // weigh the components
  142. err.x *= rwt;
  143. err.y *= gwt;
  144. err.z *= bwt;
  145. }
  146. return lengthSquared(err);
  147. }
  148. float Utils::metric1(const float a, const float b, int rotatemode)
  149. {
  150. float err = a - b;
  151. // if nonuniform, select weights and weigh away
  152. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  153. {
  154. float rwt, gwt, bwt, awt;
  155. if (AVPCL::flag_nonuniform)
  156. {
  157. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  158. }
  159. else if (AVPCL::flag_nonuniform_ati)
  160. {
  161. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  162. }
  163. // adjust weights based on rotatemode
  164. switch(rotatemode)
  165. {
  166. case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
  167. case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
  168. case ROTATEMODE_RGBA_RABG: awt = gwt; break;
  169. case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
  170. default: nvUnreachable();
  171. }
  172. // weigh the components
  173. err *= awt;
  174. }
  175. return err * err;
  176. }
  177. float Utils::premult(float r, float a)
  178. {
  179. // note that the args are really integers stored in floats
  180. int R = int(r), A = int(a);
  181. nvAssert ((R==r) && (A==a));
  182. return float((R*A + 127)/255);
  183. }
  184. static void premult4(Vector4& rgba)
  185. {
  186. rgba.x = Utils::premult(rgba.x, rgba.w);
  187. rgba.y = Utils::premult(rgba.y, rgba.w);
  188. rgba.z = Utils::premult(rgba.z, rgba.w);
  189. }
  190. static void premult3(Vector3& rgb, float a)
  191. {
  192. rgb.x = Utils::premult(rgb.x, a);
  193. rgb.y = Utils::premult(rgb.y, a);
  194. rgb.z = Utils::premult(rgb.z, a);
  195. }
  196. float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
  197. {
  198. Vector4 pma = a, pmb = b;
  199. premult4(pma);
  200. premult4(pmb);
  201. Vector4 err = pma - pmb;
  202. // if nonuniform, select weights and weigh away
  203. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  204. {
  205. float rwt, gwt, bwt;
  206. if (AVPCL::flag_nonuniform)
  207. {
  208. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  209. }
  210. else /*if (AVPCL::flag_nonuniform_ati)*/
  211. {
  212. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  213. }
  214. // weigh the components
  215. err.x *= rwt;
  216. err.y *= gwt;
  217. err.z *= bwt;
  218. }
  219. return lengthSquared(err);
  220. }
  221. float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
  222. {
  223. Vector3 pma = rgb0, pmb = rgb1;
  224. premult3(pma, a0);
  225. premult3(pmb, a1);
  226. Vector3 err = pma - pmb;
  227. // if nonuniform, select weights and weigh away
  228. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  229. {
  230. float rwt, gwt, bwt;
  231. if (AVPCL::flag_nonuniform)
  232. {
  233. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  234. }
  235. else /*if (AVPCL::flag_nonuniform_ati)*/
  236. {
  237. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  238. }
  239. // weigh the components
  240. err.x *= rwt;
  241. err.y *= gwt;
  242. err.z *= bwt;
  243. }
  244. return lengthSquared(err);
  245. }
  246. float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
  247. {
  248. Vector3 pma = rgb0, pmb = rgb1;
  249. switch(rotatemode)
  250. {
  251. case ROTATEMODE_RGBA_RGBA:
  252. // this function isn't supposed to be called for this rotatemode
  253. nvUnreachable();
  254. break;
  255. case ROTATEMODE_RGBA_AGBR:
  256. pma.y = premult(pma.y, pma.x);
  257. pma.z = premult(pma.z, pma.x);
  258. pmb.y = premult(pmb.y, pmb.x);
  259. pmb.z = premult(pmb.z, pmb.x);
  260. break;
  261. case ROTATEMODE_RGBA_RABG:
  262. pma.x = premult(pma.x, pma.y);
  263. pma.z = premult(pma.z, pma.y);
  264. pmb.x = premult(pmb.x, pmb.y);
  265. pmb.z = premult(pmb.z, pmb.y);
  266. break;
  267. case ROTATEMODE_RGBA_RGAB:
  268. pma.x = premult(pma.x, pma.z);
  269. pma.y = premult(pma.y, pma.z);
  270. pmb.x = premult(pmb.x, pmb.z);
  271. pmb.y = premult(pmb.y, pmb.z);
  272. break;
  273. default: nvUnreachable();
  274. }
  275. Vector3 err = pma - pmb;
  276. // if nonuniform, select weights and weigh away
  277. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  278. {
  279. float rwt, gwt, bwt;
  280. if (AVPCL::flag_nonuniform)
  281. {
  282. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  283. }
  284. else /*if (AVPCL::flag_nonuniform_ati)*/
  285. {
  286. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  287. }
  288. // weigh the components
  289. err.x *= rwt;
  290. err.y *= gwt;
  291. err.z *= bwt;
  292. }
  293. return lengthSquared(err);
  294. }
  295. float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
  296. {
  297. float err = premult(rgb0, a0) - premult(rgb1, a1);
  298. // if nonuniform, select weights and weigh away
  299. if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
  300. {
  301. float rwt, gwt, bwt, awt;
  302. if (AVPCL::flag_nonuniform)
  303. {
  304. rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
  305. }
  306. else if (AVPCL::flag_nonuniform_ati)
  307. {
  308. rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
  309. }
  310. // adjust weights based on rotatemode
  311. switch(rotatemode)
  312. {
  313. case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
  314. case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
  315. case ROTATEMODE_RGBA_RABG: awt = gwt; break;
  316. case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
  317. default: nvUnreachable();
  318. }
  319. // weigh the components
  320. err *= awt;
  321. }
  322. return err * err;
  323. }