transform.c 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372
  1. // qcms
  2. // Copyright (C) 2009 Mozilla Corporation
  3. // Copyright (C) 1998-2007 Marti Maria
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining
  6. // a copy of this software and associated documentation files (the "Software"),
  7. // to deal in the Software without restriction, including without limitation
  8. // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. // and/or sell copies of the Software, and to permit persons to whom the Software
  10. // is furnished to do so, subject to the following conditions:
  11. //
  12. // The above copyright notice and this permission notice shall be included in
  13. // all copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  17. // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  19. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  20. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  21. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22. #include <stdlib.h>
  23. #include <math.h>
  24. #include <assert.h>
  25. #include "qcmsint.h"
  26. /* for MSVC, GCC, Intel, and Sun compilers */
  27. #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
  28. #define X86
  29. #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
  30. //XXX: could use a bettername
  31. typedef uint16_t uint16_fract_t;
  32. /* value must be a value between 0 and 1 */
  33. //XXX: is the above a good restriction to have?
  34. float lut_interp_linear(double value, uint16_t *table, int length)
  35. {
  36. int upper, lower;
  37. value = value * (length - 1); // scale to length of the array
  38. upper = ceil(value);
  39. lower = floor(value);
  40. //XXX: can we be more performant here?
  41. value = table[upper]*(1. - (upper - value)) + table[lower]*(upper - value);
  42. /* scale the value */
  43. return value * (1./65535.);
  44. }
  45. /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
  46. uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
  47. {
  48. /* Start scaling input_value to the length of the array: 65535*(length-1).
  49. * We'll divide out the 65535 next */
  50. uint32_t value = (input_value * (length - 1));
  51. uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
  52. uint32_t lower = value / 65535; /* equivalent to floor(value/65535) */
  53. /* interp is the distance from upper to value scaled to 0..65535 */
  54. uint32_t interp = value % 65535;
  55. value = (table[upper]*(interp) + table[lower]*(65535 - interp))/65535; // 0..65535*65535
  56. return value;
  57. }
  58. /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
  59. * and returns a uint8_t value representing a range from 0..1 */
  60. static
  61. uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
  62. {
  63. /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
  64. * We'll divide out the PRECACHE_OUTPUT_MAX next */
  65. uint32_t value = (input_value * (length - 1));
  66. /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
  67. uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
  68. /* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */
  69. uint32_t lower = value / PRECACHE_OUTPUT_MAX;
  70. /* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */
  71. uint32_t interp = value % PRECACHE_OUTPUT_MAX;
  72. /* the table values range from 0..65535 */
  73. value = (table[upper]*(interp) + table[lower]*(PRECACHE_OUTPUT_MAX - interp)); // 0..(65535*PRECACHE_OUTPUT_MAX)
  74. /* round and scale */
  75. value += (PRECACHE_OUTPUT_MAX*65535/255)/2;
  76. value /= (PRECACHE_OUTPUT_MAX*65535/255); // scale to 0..255
  77. return value;
  78. }
  79. #if 0
  80. /* if we use a different representation i.e. one that goes from 0 to 0x1000 we can be more efficient
  81. * because we can avoid the divisions and use a shifting instead */
  82. /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
  83. uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
  84. {
  85. uint32_t value = (input_value * (length - 1));
  86. uint32_t upper = (value + 4095) / 4096; /* equivalent to ceil(value/4096) */
  87. uint32_t lower = value / 4096; /* equivalent to floor(value/4096) */
  88. uint32_t interp = value % 4096;
  89. value = (table[upper]*(interp) + table[lower]*(4096 - interp))/4096; // 0..4096*4096
  90. return value;
  91. }
  92. #endif
  93. void compute_curve_gamma_table_type1(float gamma_table[256], double gamma)
  94. {
  95. unsigned int i;
  96. for (i = 0; i < 256; i++) {
  97. gamma_table[i] = pow(i/255., gamma);
  98. }
  99. }
  100. void compute_curve_gamma_table_type2(float gamma_table[256], uint16_t *table, int length)
  101. {
  102. unsigned int i;
  103. for (i = 0; i < 256; i++) {
  104. gamma_table[i] = lut_interp_linear(i/255., table, length);
  105. }
  106. }
  107. void compute_curve_gamma_table_type0(float gamma_table[256])
  108. {
  109. unsigned int i;
  110. for (i = 0; i < 256; i++) {
  111. gamma_table[i] = i/255.;
  112. }
  113. }
  114. unsigned char clamp_u8(float v)
  115. {
  116. if (v > 255.)
  117. return 255;
  118. else if (v < 0)
  119. return 0;
  120. else
  121. return floor(v+.5);
  122. }
  123. struct vector {
  124. float v[3];
  125. };
  126. struct matrix {
  127. float m[3][3];
  128. bool invalid;
  129. };
  130. struct vector matrix_eval(struct matrix mat, struct vector v)
  131. {
  132. struct vector result;
  133. result.v[0] = mat.m[0][0]*v.v[0] + mat.m[0][1]*v.v[1] + mat.m[0][2]*v.v[2];
  134. result.v[1] = mat.m[1][0]*v.v[0] + mat.m[1][1]*v.v[1] + mat.m[1][2]*v.v[2];
  135. result.v[2] = mat.m[2][0]*v.v[0] + mat.m[2][1]*v.v[1] + mat.m[2][2]*v.v[2];
  136. return result;
  137. }
  138. //XXX: should probably pass by reference and we could
  139. //probably reuse this computation in matrix_invert
  140. float matrix_det(struct matrix mat)
  141. {
  142. float det;
  143. det = mat.m[0][0]*mat.m[1][1]*mat.m[2][2] +
  144. mat.m[0][1]*mat.m[1][2]*mat.m[2][0] +
  145. mat.m[0][2]*mat.m[1][0]*mat.m[2][1] -
  146. mat.m[0][0]*mat.m[1][2]*mat.m[2][1] -
  147. mat.m[0][1]*mat.m[1][0]*mat.m[2][2] -
  148. mat.m[0][2]*mat.m[1][1]*mat.m[2][0];
  149. return det;
  150. }
  151. /* from pixman and cairo and Mathematics for Game Programmers */
  152. /* lcms uses gauss-jordan elimination with partial pivoting which is
  153. * less efficient and not as numerically stable. See Mathematics for
  154. * Game Programmers. */
  155. struct matrix matrix_invert(struct matrix mat)
  156. {
  157. struct matrix dest_mat;
  158. int i,j;
  159. static int a[3] = { 2, 2, 1 };
  160. static int b[3] = { 1, 0, 0 };
  161. /* inv (A) = 1/det (A) * adj (A) */
  162. float det = matrix_det(mat);
  163. if (det == 0) {
  164. dest_mat.invalid = true;
  165. } else {
  166. dest_mat.invalid = false;
  167. }
  168. det = 1/det;
  169. for (j = 0; j < 3; j++) {
  170. for (i = 0; i < 3; i++) {
  171. double p;
  172. int ai = a[i];
  173. int aj = a[j];
  174. int bi = b[i];
  175. int bj = b[j];
  176. p = mat.m[ai][aj] * mat.m[bi][bj] -
  177. mat.m[ai][bj] * mat.m[bi][aj];
  178. if (((i + j) & 1) != 0)
  179. p = -p;
  180. dest_mat.m[j][i] = det * p;
  181. }
  182. }
  183. return dest_mat;
  184. }
  185. struct matrix matrix_identity(void)
  186. {
  187. struct matrix i;
  188. i.m[0][0] = 1;
  189. i.m[0][1] = 0;
  190. i.m[0][2] = 0;
  191. i.m[1][0] = 0;
  192. i.m[1][1] = 1;
  193. i.m[1][2] = 0;
  194. i.m[2][0] = 0;
  195. i.m[2][1] = 0;
  196. i.m[2][2] = 1;
  197. i.invalid = false;
  198. return i;
  199. }
  200. static struct matrix matrix_invalid(void)
  201. {
  202. struct matrix inv = matrix_identity();
  203. inv.invalid = true;
  204. return inv;
  205. }
  206. /* from pixman */
  207. /* MAT3per... */
  208. struct matrix matrix_multiply(struct matrix a, struct matrix b)
  209. {
  210. struct matrix result;
  211. int dx, dy;
  212. int o;
  213. for (dy = 0; dy < 3; dy++) {
  214. for (dx = 0; dx < 3; dx++) {
  215. double v = 0;
  216. for (o = 0; o < 3; o++) {
  217. v += a.m[dy][o] * b.m[o][dx];
  218. }
  219. result.m[dy][dx] = v;
  220. }
  221. }
  222. result.invalid = a.invalid || b.invalid;
  223. return result;
  224. }
  225. float u8Fixed8Number_to_float(uint16_t x)
  226. {
  227. // 0x0000 = 0.
  228. // 0x0100 = 1.
  229. // 0xffff = 255 + 255/256
  230. return x/256.;
  231. }
  232. float *build_input_gamma_table(struct curveType *TRC)
  233. {
  234. float *gamma_table = malloc(sizeof(float)*256);
  235. if (gamma_table) {
  236. if (TRC->count == 0) {
  237. compute_curve_gamma_table_type0(gamma_table);
  238. } else if (TRC->count == 1) {
  239. compute_curve_gamma_table_type1(gamma_table, u8Fixed8Number_to_float(TRC->data[0]));
  240. } else {
  241. compute_curve_gamma_table_type2(gamma_table, TRC->data, TRC->count);
  242. }
  243. }
  244. return gamma_table;
  245. }
  246. struct matrix build_colorant_matrix(qcms_profile *p)
  247. {
  248. struct matrix result;
  249. result.m[0][0] = s15Fixed16Number_to_float(p->redColorant.X);
  250. result.m[0][1] = s15Fixed16Number_to_float(p->greenColorant.X);
  251. result.m[0][2] = s15Fixed16Number_to_float(p->blueColorant.X);
  252. result.m[1][0] = s15Fixed16Number_to_float(p->redColorant.Y);
  253. result.m[1][1] = s15Fixed16Number_to_float(p->greenColorant.Y);
  254. result.m[1][2] = s15Fixed16Number_to_float(p->blueColorant.Y);
  255. result.m[2][0] = s15Fixed16Number_to_float(p->redColorant.Z);
  256. result.m[2][1] = s15Fixed16Number_to_float(p->greenColorant.Z);
  257. result.m[2][2] = s15Fixed16Number_to_float(p->blueColorant.Z);
  258. result.invalid = false;
  259. return result;
  260. }
  261. /* The following code is copied nearly directly from lcms.
  262. * I think it could be much better. For example, Argyll seems to have better code in
  263. * icmTable_lookup_bwd and icmTable_setup_bwd. However, for now this is a quick way
  264. * to a working solution and allows for easy comparing with lcms. */
  265. uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int length)
  266. {
  267. int l = 1;
  268. int r = 0x10000;
  269. int x = 0, res; // 'int' Give spacing for negative values
  270. int NumZeroes, NumPoles;
  271. int cell0, cell1;
  272. double val2;
  273. double y0, y1, x0, x1;
  274. double a, b, f;
  275. // July/27 2001 - Expanded to handle degenerated curves with an arbitrary
  276. // number of elements containing 0 at the begining of the table (Zeroes)
  277. // and another arbitrary number of poles (FFFFh) at the end.
  278. // First the zero and pole extents are computed, then value is compared.
  279. NumZeroes = 0;
  280. while (LutTable[NumZeroes] == 0 && NumZeroes < length-1)
  281. NumZeroes++;
  282. // There are no zeros at the beginning and we are trying to find a zero, so
  283. // return anything. It seems zero would be the less destructive choice
  284. /* I'm not sure that this makes sense, but oh well... */
  285. if (NumZeroes == 0 && Value == 0)
  286. return 0;
  287. NumPoles = 0;
  288. while (LutTable[length-1- NumPoles] == 0xFFFF && NumPoles < length-1)
  289. NumPoles++;
  290. // Does the curve belong to this case?
  291. if (NumZeroes > 1 || NumPoles > 1)
  292. {
  293. int a, b;
  294. // Identify if value fall downto 0 or FFFF zone
  295. if (Value == 0) return 0;
  296. // if (Value == 0xFFFF) return 0xFFFF;
  297. // else restrict to valid zone
  298. a = ((NumZeroes-1) * 0xFFFF) / (length-1);
  299. b = ((length-1 - NumPoles) * 0xFFFF) / (length-1);
  300. l = a - 1;
  301. r = b + 1;
  302. }
  303. // Seems not a degenerated case... apply binary search
  304. while (r > l) {
  305. x = (l + r) / 2;
  306. res = (int) lut_interp_linear16((uint16_fract_t) (x-1), LutTable, length);
  307. if (res == Value) {
  308. // Found exact match.
  309. return (uint16_fract_t) (x - 1);
  310. }
  311. if (res > Value) r = x - 1;
  312. else l = x + 1;
  313. }
  314. // Not found, should we interpolate?
  315. // Get surrounding nodes
  316. val2 = (length-1) * ((double) (x - 1) / 65535.0);
  317. cell0 = (int) floor(val2);
  318. cell1 = (int) ceil(val2);
  319. if (cell0 == cell1) return (uint16_fract_t) x;
  320. y0 = LutTable[cell0] ;
  321. x0 = (65535.0 * cell0) / (length-1);
  322. y1 = LutTable[cell1] ;
  323. x1 = (65535.0 * cell1) / (length-1);
  324. a = (y1 - y0) / (x1 - x0);
  325. b = y0 - a * x0;
  326. if (fabs(a) < 0.01) return (uint16_fract_t) x;
  327. f = ((Value - b) / a);
  328. if (f < 0.0) return (uint16_fract_t) 0;
  329. if (f >= 65535.0) return (uint16_fract_t) 0xFFFF;
  330. return (uint16_fract_t) floor(f + 0.5);
  331. }
  332. // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
  333. // This is just an approximation, I am not handling all the non-linear
  334. // aspects of the RGB to XYZ process, and assumming that the gamma correction
  335. // has transitive property in the tranformation chain.
  336. //
  337. // the alghoritm:
  338. //
  339. // - First I build the absolute conversion matrix using
  340. // primaries in XYZ. This matrix is next inverted
  341. // - Then I eval the source white point across this matrix
  342. // obtaining the coeficients of the transformation
  343. // - Then, I apply these coeficients to the original matrix
  344. static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
  345. {
  346. struct matrix primaries;
  347. struct matrix primaries_invert;
  348. struct matrix result;
  349. struct vector white_point;
  350. struct vector coefs;
  351. double xn, yn;
  352. double xr, yr;
  353. double xg, yg;
  354. double xb, yb;
  355. xn = white.x;
  356. yn = white.y;
  357. if (yn == 0.0)
  358. return matrix_invalid();
  359. xr = primrs.red.x;
  360. yr = primrs.red.y;
  361. xg = primrs.green.x;
  362. yg = primrs.green.y;
  363. xb = primrs.blue.x;
  364. yb = primrs.blue.y;
  365. primaries.m[0][0] = xr;
  366. primaries.m[0][1] = xg;
  367. primaries.m[0][2] = xb;
  368. primaries.m[1][0] = yr;
  369. primaries.m[1][1] = yg;
  370. primaries.m[1][2] = yb;
  371. primaries.m[2][0] = 1 - xr - yr;
  372. primaries.m[2][1] = 1 - xg - yg;
  373. primaries.m[2][2] = 1 - xb - yb;
  374. primaries.invalid = false;
  375. white_point.v[0] = xn/yn;
  376. white_point.v[1] = 1.;
  377. white_point.v[2] = (1.0-xn-yn)/yn;
  378. primaries_invert = matrix_invert(primaries);
  379. coefs = matrix_eval(primaries_invert, white_point);
  380. result.m[0][0] = coefs.v[0]*xr;
  381. result.m[0][1] = coefs.v[1]*xg;
  382. result.m[0][2] = coefs.v[2]*xb;
  383. result.m[1][0] = coefs.v[0]*yr;
  384. result.m[1][1] = coefs.v[1]*yg;
  385. result.m[1][2] = coefs.v[2]*yb;
  386. result.m[2][0] = coefs.v[0]*(1.-xr-yr);
  387. result.m[2][1] = coefs.v[1]*(1.-xg-yg);
  388. result.m[2][2] = coefs.v[2]*(1.-xb-yb);
  389. result.invalid = primaries_invert.invalid;
  390. return result;
  391. }
  392. struct CIE_XYZ {
  393. double X;
  394. double Y;
  395. double Z;
  396. };
  397. /* CIE Illuminant D50 */
  398. static const struct CIE_XYZ D50_XYZ = {
  399. 0.9642,
  400. 1.0000,
  401. 0.8249
  402. };
  403. /* from lcms: xyY2XYZ()
  404. * corresponds to argyll: icmYxy2XYZ() */
  405. static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
  406. {
  407. struct CIE_XYZ dest;
  408. dest.X = (source.x / source.y) * source.Y;
  409. dest.Y = source.Y;
  410. dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
  411. return dest;
  412. }
  413. /* from lcms: ComputeChromaticAdaption */
  414. // Compute chromatic adaption matrix using chad as cone matrix
  415. static struct matrix
  416. compute_chromatic_adaption(struct CIE_XYZ source_white_point,
  417. struct CIE_XYZ dest_white_point,
  418. struct matrix chad)
  419. {
  420. struct matrix chad_inv;
  421. struct vector cone_source_XYZ, cone_source_rgb;
  422. struct vector cone_dest_XYZ, cone_dest_rgb;
  423. struct matrix cone, tmp;
  424. tmp = chad;
  425. chad_inv = matrix_invert(tmp);
  426. cone_source_XYZ.v[0] = source_white_point.X;
  427. cone_source_XYZ.v[1] = source_white_point.Y;
  428. cone_source_XYZ.v[2] = source_white_point.Z;
  429. cone_dest_XYZ.v[0] = dest_white_point.X;
  430. cone_dest_XYZ.v[1] = dest_white_point.Y;
  431. cone_dest_XYZ.v[2] = dest_white_point.Z;
  432. cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
  433. cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);
  434. cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
  435. cone.m[0][1] = 0;
  436. cone.m[0][2] = 0;
  437. cone.m[1][0] = 0;
  438. cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
  439. cone.m[1][2] = 0;
  440. cone.m[2][0] = 0;
  441. cone.m[2][1] = 0;
  442. cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
  443. cone.invalid = false;
  444. // Normalize
  445. return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
  446. }
  447. /* from lcms: cmsAdaptionMatrix */
  448. // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
  449. // Bradford is assumed
  450. static struct matrix
  451. adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
  452. {
  453. struct matrix lam_rigg = {{ // Bradford matrix
  454. { 0.8951, 0.2664, -0.1614 },
  455. { -0.7502, 1.7135, 0.0367 },
  456. { 0.0389, -0.0685, 1.0296 }
  457. }};
  458. return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
  459. }
  460. /* from lcms: cmsAdaptMatrixToD50 */
  461. static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
  462. {
  463. struct CIE_XYZ Dn;
  464. struct matrix Bradford;
  465. if (source_white_pt.y == 0.0)
  466. return matrix_invalid();
  467. Dn = xyY2XYZ(source_white_pt);
  468. Bradford = adaption_matrix(Dn, D50_XYZ);
  469. return matrix_multiply(Bradford, r);
  470. }
  471. qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
  472. {
  473. struct matrix colorants;
  474. colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
  475. colorants = adapt_matrix_to_D50(colorants, white_point);
  476. if (colorants.invalid)
  477. return false;
  478. /* note: there's a transpose type of operation going on here */
  479. profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
  480. profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
  481. profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
  482. profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
  483. profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
  484. profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
  485. profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
  486. profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
  487. profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
  488. return true;
  489. }
  490. /*
  491. The number of entries needed to invert a lookup table should not
  492. necessarily be the same as the original number of entries. This is
  493. especially true of lookup tables that have a small number of entries.
  494. For example:
  495. Using a table like:
  496. {0, 3104, 14263, 34802, 65535}
  497. invert_lut will produce an inverse of:
  498. {3, 34459, 47529, 56801, 65535}
  499. which has an maximum error of about 9855 (pixel difference of ~38.346)
  500. For now, we punt the decision of output size to the caller. */
  501. static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
  502. {
  503. int i;
  504. /* for now we invert the lut by creating a lut of size out_length
  505. * and attempting to lookup a value for each entry using lut_inverse_interp16 */
  506. uint16_t *output = malloc(sizeof(uint16_t)*out_length);
  507. if (!output)
  508. return NULL;
  509. for (i = 0; i < out_length; i++) {
  510. double x = ((double) i * 65535.) / (double) (out_length - 1);
  511. uint16_fract_t input = floor(x + .5);
  512. output[i] = lut_inverse_interp16(input, table, length);
  513. }
  514. return output;
  515. }
  516. static uint16_t *build_linear_table(int length)
  517. {
  518. int i;
  519. uint16_t *output = malloc(sizeof(uint16_t)*length);
  520. if (!output)
  521. return NULL;
  522. for (i = 0; i < length; i++) {
  523. double x = ((double) i * 65535.) / (double) (length - 1);
  524. uint16_fract_t input = floor(x + .5);
  525. output[i] = input;
  526. }
  527. return output;
  528. }
  529. static uint16_t *build_pow_table(float gamma, int length)
  530. {
  531. int i;
  532. uint16_t *output = malloc(sizeof(uint16_t)*length);
  533. if (!output)
  534. return NULL;
  535. for (i = 0; i < length; i++) {
  536. uint16_fract_t result;
  537. double x = ((double) i) / (double) (length - 1);
  538. x = pow(x, gamma);
  539. //XXX turn this conversion into a function
  540. result = floor(x*65535. + .5);
  541. output[i] = result;
  542. }
  543. return output;
  544. }
  545. static float clamp_float(float a)
  546. {
  547. if (a > 1.)
  548. return 1.;
  549. else if (a < 0)
  550. return 0;
  551. else
  552. return a;
  553. }
  554. #if 0
  555. static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  556. {
  557. int i;
  558. float (*mat)[4] = transform->matrix;
  559. for (i=0; i<length; i++) {
  560. unsigned char device_r = *src++;
  561. unsigned char device_g = *src++;
  562. unsigned char device_b = *src++;
  563. float linear_r = transform->input_gamma_table_r[device_r];
  564. float linear_g = transform->input_gamma_table_g[device_g];
  565. float linear_b = transform->input_gamma_table_b[device_b];
  566. float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
  567. float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
  568. float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
  569. float out_device_r = pow(out_linear_r, transform->out_gamma_r);
  570. float out_device_g = pow(out_linear_g, transform->out_gamma_g);
  571. float out_device_b = pow(out_linear_b, transform->out_gamma_b);
  572. *dest++ = clamp_u8(255*out_device_r);
  573. *dest++ = clamp_u8(255*out_device_g);
  574. *dest++ = clamp_u8(255*out_device_b);
  575. }
  576. }
  577. #endif
  578. static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  579. {
  580. unsigned int i;
  581. for (i = 0; i < length; i++) {
  582. float out_device_r, out_device_g, out_device_b;
  583. unsigned char device = *src++;
  584. float linear = transform->input_gamma_table_gray[device];
  585. out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
  586. out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
  587. out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
  588. *dest++ = clamp_u8(out_device_r*255);
  589. *dest++ = clamp_u8(out_device_g*255);
  590. *dest++ = clamp_u8(out_device_b*255);
  591. }
  592. }
  593. /* Alpha is not corrected.
  594. A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
  595. RGB Is?" Tech Memo 17 (December 14, 1998).
  596. See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
  597. */
  598. static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  599. {
  600. unsigned int i;
  601. for (i = 0; i < length; i++) {
  602. float out_device_r, out_device_g, out_device_b;
  603. unsigned char device = *src++;
  604. unsigned char alpha = *src++;
  605. float linear = transform->input_gamma_table_gray[device];
  606. out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
  607. out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
  608. out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
  609. *dest++ = clamp_u8(out_device_r*255);
  610. *dest++ = clamp_u8(out_device_g*255);
  611. *dest++ = clamp_u8(out_device_b*255);
  612. *dest++ = alpha;
  613. }
  614. }
  615. static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  616. {
  617. unsigned int i;
  618. for (i = 0; i < length; i++) {
  619. unsigned char device = *src++;
  620. uint16_t gray;
  621. float linear = transform->input_gamma_table_gray[device];
  622. /* we could round here... */
  623. gray = linear * PRECACHE_OUTPUT_MAX;
  624. *dest++ = transform->output_table_r->data[gray];
  625. *dest++ = transform->output_table_g->data[gray];
  626. *dest++ = transform->output_table_b->data[gray];
  627. }
  628. }
  629. static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  630. {
  631. unsigned int i;
  632. for (i = 0; i < length; i++) {
  633. unsigned char device = *src++;
  634. unsigned char alpha = *src++;
  635. uint16_t gray;
  636. float linear = transform->input_gamma_table_gray[device];
  637. /* we could round here... */
  638. gray = linear * PRECACHE_OUTPUT_MAX;
  639. *dest++ = transform->output_table_r->data[gray];
  640. *dest++ = transform->output_table_g->data[gray];
  641. *dest++ = transform->output_table_b->data[gray];
  642. *dest++ = alpha;
  643. }
  644. }
  645. static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  646. {
  647. unsigned int i;
  648. float (*mat)[4] = transform->matrix;
  649. for (i = 0; i < length; i++) {
  650. unsigned char device_r = *src++;
  651. unsigned char device_g = *src++;
  652. unsigned char device_b = *src++;
  653. uint16_t r, g, b;
  654. float linear_r = transform->input_gamma_table_r[device_r];
  655. float linear_g = transform->input_gamma_table_g[device_g];
  656. float linear_b = transform->input_gamma_table_b[device_b];
  657. float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
  658. float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
  659. float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
  660. out_linear_r = clamp_float(out_linear_r);
  661. out_linear_g = clamp_float(out_linear_g);
  662. out_linear_b = clamp_float(out_linear_b);
  663. /* we could round here... */
  664. r = out_linear_r * PRECACHE_OUTPUT_MAX;
  665. g = out_linear_g * PRECACHE_OUTPUT_MAX;
  666. b = out_linear_b * PRECACHE_OUTPUT_MAX;
  667. *dest++ = transform->output_table_r->data[r];
  668. *dest++ = transform->output_table_g->data[g];
  669. *dest++ = transform->output_table_b->data[b];
  670. }
  671. }
  672. static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  673. {
  674. unsigned int i;
  675. float (*mat)[4] = transform->matrix;
  676. for (i = 0; i < length; i++) {
  677. unsigned char device_r = *src++;
  678. unsigned char device_g = *src++;
  679. unsigned char device_b = *src++;
  680. unsigned char alpha = *src++;
  681. uint16_t r, g, b;
  682. float linear_r = transform->input_gamma_table_r[device_r];
  683. float linear_g = transform->input_gamma_table_g[device_g];
  684. float linear_b = transform->input_gamma_table_b[device_b];
  685. float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
  686. float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
  687. float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
  688. out_linear_r = clamp_float(out_linear_r);
  689. out_linear_g = clamp_float(out_linear_g);
  690. out_linear_b = clamp_float(out_linear_b);
  691. /* we could round here... */
  692. r = out_linear_r * PRECACHE_OUTPUT_MAX;
  693. g = out_linear_g * PRECACHE_OUTPUT_MAX;
  694. b = out_linear_b * PRECACHE_OUTPUT_MAX;
  695. *dest++ = transform->output_table_r->data[r];
  696. *dest++ = transform->output_table_g->data[g];
  697. *dest++ = transform->output_table_b->data[b];
  698. *dest++ = alpha;
  699. }
  700. }
  701. static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  702. {
  703. unsigned int i;
  704. float (*mat)[4] = transform->matrix;
  705. for (i = 0; i < length; i++) {
  706. unsigned char device_r = *src++;
  707. unsigned char device_g = *src++;
  708. unsigned char device_b = *src++;
  709. float out_device_r, out_device_g, out_device_b;
  710. float linear_r = transform->input_gamma_table_r[device_r];
  711. float linear_g = transform->input_gamma_table_g[device_g];
  712. float linear_b = transform->input_gamma_table_b[device_b];
  713. float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
  714. float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
  715. float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
  716. out_linear_r = clamp_float(out_linear_r);
  717. out_linear_g = clamp_float(out_linear_g);
  718. out_linear_b = clamp_float(out_linear_b);
  719. out_device_r = lut_interp_linear(out_linear_r, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
  720. out_device_g = lut_interp_linear(out_linear_g, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
  721. out_device_b = lut_interp_linear(out_linear_b, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
  722. *dest++ = clamp_u8(out_device_r*255);
  723. *dest++ = clamp_u8(out_device_g*255);
  724. *dest++ = clamp_u8(out_device_b*255);
  725. }
  726. }
  727. static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  728. {
  729. unsigned int i;
  730. float (*mat)[4] = transform->matrix;
  731. for (i = 0; i < length; i++) {
  732. unsigned char device_r = *src++;
  733. unsigned char device_g = *src++;
  734. unsigned char device_b = *src++;
  735. unsigned char alpha = *src++;
  736. float out_device_r, out_device_g, out_device_b;
  737. float linear_r = transform->input_gamma_table_r[device_r];
  738. float linear_g = transform->input_gamma_table_g[device_g];
  739. float linear_b = transform->input_gamma_table_b[device_b];
  740. float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
  741. float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
  742. float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
  743. out_linear_r = clamp_float(out_linear_r);
  744. out_linear_g = clamp_float(out_linear_g);
  745. out_linear_b = clamp_float(out_linear_b);
  746. out_device_r = lut_interp_linear(out_linear_r, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
  747. out_device_g = lut_interp_linear(out_linear_g, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
  748. out_device_b = lut_interp_linear(out_linear_b, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
  749. *dest++ = clamp_u8(out_device_r*255);
  750. *dest++ = clamp_u8(out_device_g*255);
  751. *dest++ = clamp_u8(out_device_b*255);
  752. *dest++ = alpha;
  753. }
  754. }
  755. #if 0
  756. static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
  757. {
  758. int i;
  759. float (*mat)[4] = transform->matrix;
  760. for (i = 0; i < length; i++) {
  761. unsigned char device_r = *src++;
  762. unsigned char device_g = *src++;
  763. unsigned char device_b = *src++;
  764. float linear_r = transform->input_gamma_table_r[device_r];
  765. float linear_g = transform->input_gamma_table_g[device_g];
  766. float linear_b = transform->input_gamma_table_b[device_b];
  767. float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
  768. float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
  769. float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
  770. *dest++ = clamp_u8(out_linear_r*255);
  771. *dest++ = clamp_u8(out_linear_g*255);
  772. *dest++ = clamp_u8(out_linear_b*255);
  773. }
  774. }
  775. #endif
  776. static struct precache_output *precache_reference(struct precache_output *p)
  777. {
  778. p->ref_count++;
  779. return p;
  780. }
  781. static struct precache_output *precache_create()
  782. {
  783. struct precache_output *p = malloc(sizeof(struct precache_output));
  784. if (p)
  785. p->ref_count = 1;
  786. return p;
  787. }
  788. void precache_release(struct precache_output *p)
  789. {
  790. if (--p->ref_count == 0) {
  791. free(p);
  792. }
  793. }
  794. #ifdef HAS_POSIX_MEMALIGN
  795. static qcms_transform *transform_alloc(void)
  796. {
  797. qcms_transform *t;
  798. if (!posix_memalign(&t, 16, sizeof(*t))) {
  799. return t;
  800. } else {
  801. return NULL;
  802. }
  803. }
  804. static void transform_free(qcms_transform *t)
  805. {
  806. free(t);
  807. }
  808. #else
  809. static qcms_transform *transform_alloc(void)
  810. {
  811. /* transform needs to be aligned on a 16byte boundrary */
  812. char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
  813. /* make room for a pointer to the block returned by calloc */
  814. void *transform_start = original_block + sizeof(void*);
  815. /* align transform_start */
  816. qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
  817. /* store a pointer to the block returned by calloc so that we can free it later */
  818. void **(original_block_ptr) = (void**)transform_aligned;
  819. if (!original_block)
  820. return NULL;
  821. original_block_ptr--;
  822. *original_block_ptr = original_block;
  823. return transform_aligned;
  824. }
  825. static void transform_free(qcms_transform *t)
  826. {
  827. /* get at the pointer to the unaligned block returned by calloc */
  828. void **p = (void**)t;
  829. p--;
  830. free(*p);
  831. }
  832. #endif
  833. void qcms_transform_release(qcms_transform *t)
  834. {
  835. /* ensure we only free the gamma tables once even if there are
  836. * multiple references to the same data */
  837. if (t->output_table_r)
  838. precache_release(t->output_table_r);
  839. if (t->output_table_g)
  840. precache_release(t->output_table_g);
  841. if (t->output_table_b)
  842. precache_release(t->output_table_b);
  843. free(t->input_gamma_table_r);
  844. if (t->input_gamma_table_g != t->input_gamma_table_r)
  845. free(t->input_gamma_table_g);
  846. if (t->input_gamma_table_g != t->input_gamma_table_r &&
  847. t->input_gamma_table_g != t->input_gamma_table_b)
  848. free(t->input_gamma_table_b);
  849. free(t->input_gamma_table_gray);
  850. free(t->output_gamma_lut_r);
  851. free(t->output_gamma_lut_g);
  852. free(t->output_gamma_lut_b);
  853. transform_free(t);
  854. }
  855. static void compute_precache_pow(uint8_t *output, float gamma)
  856. {
  857. uint32_t v = 0;
  858. for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
  859. //XXX: don't do integer/float conversion... and round?
  860. output[v] = 255. * pow(v/(double)PRECACHE_OUTPUT_MAX, gamma);
  861. }
  862. }
  863. void compute_precache_lut(uint8_t *output, uint16_t *table, int length)
  864. {
  865. uint32_t v = 0;
  866. for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
  867. output[v] = lut_interp_linear_precache_output(v, table, length);
  868. }
  869. }
  870. void compute_precache_linear(uint8_t *output)
  871. {
  872. uint32_t v = 0;
  873. for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) {
  874. //XXX: round?
  875. output[v] = v / (PRECACHE_OUTPUT_SIZE/256);
  876. }
  877. }
  878. qcms_bool compute_precache(struct curveType *trc, uint8_t *output)
  879. {
  880. if (trc->count == 0) {
  881. compute_precache_linear(output);
  882. } else if (trc->count == 1) {
  883. compute_precache_pow(output, 1./u8Fixed8Number_to_float(trc->data[0]));
  884. } else {
  885. uint16_t *inverted;
  886. int inverted_size = trc->count;
  887. //XXX: the choice of a minimum of 256 here is not backed by any theory, measurement or data, however it is what lcms uses.
  888. // the maximum number we would need is 65535 because that's the accuracy used for computing the precache table
  889. if (inverted_size < 256)
  890. inverted_size = 256;
  891. inverted = invert_lut(trc->data, trc->count, inverted_size);
  892. if (!inverted)
  893. return false;
  894. compute_precache_lut(output, inverted, inverted_size);
  895. free(inverted);
  896. }
  897. return true;
  898. }
  899. #ifdef X86
  900. // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
  901. // mozilla/jpeg)
  902. // -------------------------------------------------------------------------
  903. #if defined(_M_IX86) && defined(_MSC_VER)
  904. #define HAS_CPUID
  905. /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
  906. register - I'm not sure if that ever happens on windows, but cpuid isn't
  907. on the critical path so we just preserve the register to be safe and to be
  908. consistent with the non-windows version. */
  909. static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
  910. uint32_t a_, b_, c_, d_;
  911. __asm {
  912. xchg ebx, esi
  913. mov eax, fxn
  914. cpuid
  915. mov a_, eax
  916. mov b_, ebx
  917. mov c_, ecx
  918. mov d_, edx
  919. xchg ebx, esi
  920. }
  921. *a = a_;
  922. *b = b_;
  923. *c = c_;
  924. *d = d_;
  925. }
  926. #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
  927. #define HAS_CPUID
  928. /* Get us a CPUID function. We can't use ebx because it's the PIC register on
  929. some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
  930. static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
  931. uint32_t a_, b_, c_, d_;
  932. __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
  933. : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
  934. *a = a_;
  935. *b = b_;
  936. *c = c_;
  937. *d = d_;
  938. }
  939. #endif
  940. // -------------------------Runtime SSEx Detection-----------------------------
  941. /* MMX is always supported per
  942. * Gecko v1.9.1 minimum CPU requirements */
  943. #define SSE1_EDX_MASK (1UL << 25)
  944. #define SSE2_EDX_MASK (1UL << 26)
  945. #define SSE3_ECX_MASK (1UL << 0)
  946. static int sse_version_available(void)
  947. {
  948. #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
  949. /* we know at build time that 64-bit CPUs always have SSE2
  950. * this tells the compiler that non-SSE2 branches will never be
  951. * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
  952. return 2;
  953. #elif defined(HAS_CPUID)
  954. static int sse_version = -1;
  955. uint32_t a, b, c, d;
  956. uint32_t function = 0x00000001;
  957. if (sse_version == -1) {
  958. sse_version = 0;
  959. cpuid(function, &a, &b, &c, &d);
  960. if (c & SSE3_ECX_MASK)
  961. sse_version = 3;
  962. else if (d & SSE2_EDX_MASK)
  963. sse_version = 2;
  964. else if (d & SSE1_EDX_MASK)
  965. sse_version = 1;
  966. }
  967. return sse_version;
  968. #else
  969. return 0;
  970. #endif
  971. }
  972. #endif
  973. void build_output_lut(struct curveType *trc,
  974. uint16_t **output_gamma_lut, size_t *output_gamma_lut_length)
  975. {
  976. if (trc->count == 0) {
  977. *output_gamma_lut = build_linear_table(4096);
  978. *output_gamma_lut_length = 4096;
  979. } else if (trc->count == 1) {
  980. float gamma = 1./u8Fixed8Number_to_float(trc->data[0]);
  981. *output_gamma_lut = build_pow_table(gamma, 4096);
  982. *output_gamma_lut_length = 4096;
  983. } else {
  984. //XXX: the choice of a minimum of 256 here is not backed by any theory, measurement or data, however it is what lcms uses.
  985. *output_gamma_lut_length = trc->count;
  986. if (*output_gamma_lut_length < 256)
  987. *output_gamma_lut_length = 256;
  988. *output_gamma_lut = invert_lut(trc->data, trc->count, *output_gamma_lut_length);
  989. }
  990. }
  991. void qcms_profile_precache_output_transform(qcms_profile *profile)
  992. {
  993. /* we only support precaching on rgb profiles */
  994. if (profile->color_space != RGB_SIGNATURE)
  995. return;
  996. if (!profile->output_table_r) {
  997. profile->output_table_r = precache_create();
  998. if (profile->output_table_r &&
  999. !compute_precache(profile->redTRC, profile->output_table_r->data)) {
  1000. precache_release(profile->output_table_r);
  1001. profile->output_table_r = NULL;
  1002. }
  1003. }
  1004. if (!profile->output_table_g) {
  1005. profile->output_table_g = precache_create();
  1006. if (profile->output_table_g &&
  1007. !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
  1008. precache_release(profile->output_table_g);
  1009. profile->output_table_g = NULL;
  1010. }
  1011. }
  1012. if (!profile->output_table_b) {
  1013. profile->output_table_b = precache_create();
  1014. if (profile->output_table_b &&
  1015. !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
  1016. precache_release(profile->output_table_b);
  1017. profile->output_table_b = NULL;
  1018. }
  1019. }
  1020. }
  1021. #define NO_MEM_TRANSFORM NULL
  1022. qcms_transform* qcms_transform_create(
  1023. qcms_profile *in, qcms_data_type in_type,
  1024. qcms_profile* out, qcms_data_type out_type,
  1025. qcms_intent intent)
  1026. {
  1027. bool precache = false;
  1028. qcms_transform *transform = transform_alloc();
  1029. if (!transform) {
  1030. return NULL;
  1031. }
  1032. if (out_type != QCMS_DATA_RGB_8 &&
  1033. out_type != QCMS_DATA_RGBA_8) {
  1034. assert(0 && "output type");
  1035. transform_free(transform);
  1036. return NULL;
  1037. }
  1038. if (out->output_table_r &&
  1039. out->output_table_g &&
  1040. out->output_table_b) {
  1041. precache = true;
  1042. }
  1043. if (precache) {
  1044. transform->output_table_r = precache_reference(out->output_table_r);
  1045. transform->output_table_g = precache_reference(out->output_table_g);
  1046. transform->output_table_b = precache_reference(out->output_table_b);
  1047. } else {
  1048. build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
  1049. build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
  1050. build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
  1051. if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
  1052. qcms_transform_release(transform);
  1053. return NO_MEM_TRANSFORM;
  1054. }
  1055. }
  1056. if (in->color_space == RGB_SIGNATURE) {
  1057. struct matrix in_matrix, out_matrix, result;
  1058. if (in_type != QCMS_DATA_RGB_8 &&
  1059. in_type != QCMS_DATA_RGBA_8){
  1060. assert(0 && "input type");
  1061. transform_free(transform);
  1062. return NULL;
  1063. }
  1064. if (precache) {
  1065. #ifdef X86
  1066. if (sse_version_available() >= 2) {
  1067. if (in_type == QCMS_DATA_RGB_8)
  1068. transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
  1069. else
  1070. transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
  1071. #if !(defined(_MSC_VER) && defined(_M_AMD64))
  1072. /* Microsoft Compiler for x64 doesn't support MMX.
  1073. * SSE code uses MMX so that we disable on x64 */
  1074. } else
  1075. if (sse_version_available() >= 1) {
  1076. if (in_type == QCMS_DATA_RGB_8)
  1077. transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
  1078. else
  1079. transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
  1080. #endif
  1081. } else
  1082. #endif
  1083. {
  1084. if (in_type == QCMS_DATA_RGB_8)
  1085. transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
  1086. else
  1087. transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
  1088. }
  1089. } else {
  1090. if (in_type == QCMS_DATA_RGB_8)
  1091. transform->transform_fn = qcms_transform_data_rgb_out_lut;
  1092. else
  1093. transform->transform_fn = qcms_transform_data_rgba_out_lut;
  1094. }
  1095. //XXX: avoid duplicating tables if we can
  1096. transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
  1097. transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
  1098. transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
  1099. if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
  1100. qcms_transform_release(transform);
  1101. return NO_MEM_TRANSFORM;
  1102. }
  1103. /* build combined colorant matrix */
  1104. in_matrix = build_colorant_matrix(in);
  1105. out_matrix = build_colorant_matrix(out);
  1106. out_matrix = matrix_invert(out_matrix);
  1107. if (out_matrix.invalid) {
  1108. qcms_transform_release(transform);
  1109. return NULL;
  1110. }
  1111. result = matrix_multiply(out_matrix, in_matrix);
  1112. /* store the results in column major mode
  1113. * this makes doing the multiplication with sse easier */
  1114. transform->matrix[0][0] = result.m[0][0];
  1115. transform->matrix[1][0] = result.m[0][1];
  1116. transform->matrix[2][0] = result.m[0][2];
  1117. transform->matrix[0][1] = result.m[1][0];
  1118. transform->matrix[1][1] = result.m[1][1];
  1119. transform->matrix[2][1] = result.m[1][2];
  1120. transform->matrix[0][2] = result.m[2][0];
  1121. transform->matrix[1][2] = result.m[2][1];
  1122. transform->matrix[2][2] = result.m[2][2];
  1123. } else if (in->color_space == GRAY_SIGNATURE) {
  1124. if (in_type != QCMS_DATA_GRAY_8 &&
  1125. in_type != QCMS_DATA_GRAYA_8){
  1126. assert(0 && "input type");
  1127. transform_free(transform);
  1128. return NULL;
  1129. }
  1130. transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
  1131. if (!transform->input_gamma_table_gray) {
  1132. qcms_transform_release(transform);
  1133. return NO_MEM_TRANSFORM;
  1134. }
  1135. if (precache) {
  1136. if (in_type == QCMS_DATA_GRAY_8) {
  1137. transform->transform_fn = qcms_transform_data_gray_out_precache;
  1138. } else {
  1139. transform->transform_fn = qcms_transform_data_graya_out_precache;
  1140. }
  1141. } else {
  1142. if (in_type == QCMS_DATA_GRAY_8) {
  1143. transform->transform_fn = qcms_transform_data_gray_out_lut;
  1144. } else {
  1145. transform->transform_fn = qcms_transform_data_graya_out_lut;
  1146. }
  1147. }
  1148. } else {
  1149. assert(0 && "unexpected colorspace");
  1150. qcms_transform_release(transform);
  1151. return NO_MEM_TRANSFORM;
  1152. }
  1153. return transform;
  1154. }
  1155. #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
  1156. /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
  1157. __attribute__((__force_align_arg_pointer__))
  1158. #endif
  1159. void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
  1160. {
  1161. transform->transform_fn(transform, src, dest, length);
  1162. }