jsimd_arm.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. #ifndef __LP64__ // ESENTHEL CHANGED
  2. /*
  3. * jsimd_arm.c
  4. *
  5. * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
  6. * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander.
  7. * Copyright (C) 2015-2016, Matthieu Darbois.
  8. *
  9. * Based on the x86 SIMD extension for IJG JPEG library,
  10. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  11. * For conditions of distribution and use, see copyright notice in jsimdext.inc
  12. *
  13. * This file contains the interface between the "normal" portions
  14. * of the library and the SIMD implementations when running on a
  15. * 32-bit ARM architecture.
  16. */
  17. #define JPEG_INTERNALS
  18. #include "../jinclude.h"
  19. #include "../jpeglib.h"
  20. #include "../jsimd.h"
  21. #include "../jdct.h"
  22. #include "../jsimddct.h"
  23. #include "jsimd.h"
  24. #include <stdio.h>
  25. #include <string.h>
  26. #include <ctype.h>
  27. static unsigned int simd_support = ~0;
  28. static unsigned int simd_huffman = 1;
  29. #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  30. #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
  31. LOCAL(int)
  32. check_feature (char *buffer, char *feature)
  33. {
  34. char *p;
  35. if (*feature == 0)
  36. return 0;
  37. if (strncmp(buffer, "Features", 8) != 0)
  38. return 0;
  39. buffer += 8;
  40. while (isspace(*buffer))
  41. buffer++;
  42. /* Check if 'feature' is present in the buffer as a separate word */
  43. while ((p = strstr(buffer, feature))) {
  44. if (p > buffer && !isspace(*(p - 1))) {
  45. buffer++;
  46. continue;
  47. }
  48. p += strlen(feature);
  49. if (*p != 0 && !isspace(*p)) {
  50. buffer++;
  51. continue;
  52. }
  53. return 1;
  54. }
  55. return 0;
  56. }
  57. LOCAL(int)
  58. parse_proc_cpuinfo (int bufsize)
  59. {
  60. char *buffer = (char *)malloc(bufsize);
  61. FILE *fd;
  62. simd_support = 0;
  63. if (!buffer)
  64. return 0;
  65. fd = fopen("/proc/cpuinfo", "r");
  66. if (fd) {
  67. while (fgets(buffer, bufsize, fd)) {
  68. if (!strchr(buffer, '\n') && !feof(fd)) {
  69. /* "impossible" happened - insufficient size of the buffer! */
  70. fclose(fd);
  71. free(buffer);
  72. return 0;
  73. }
  74. if (check_feature(buffer, "neon"))
  75. simd_support |= JSIMD_ARM_NEON;
  76. }
  77. fclose(fd);
  78. }
  79. free(buffer);
  80. return 1;
  81. }
  82. #endif
  83. /*
  84. * Check what SIMD accelerations are supported.
  85. *
  86. * FIXME: This code is racy under a multi-threaded environment.
  87. */
  88. LOCAL(void)
  89. init_simd (void)
  90. {
  91. char *env = NULL;
  92. #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  93. int bufsize = 1024; /* an initial guess for the line buffer size limit */
  94. #endif
  95. if (simd_support != ~0U)
  96. return;
  97. simd_support = 0;
  98. #if defined(__ARM_NEON__)
  99. simd_support |= JSIMD_ARM_NEON;
  100. #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
  101. /* We still have a chance to use NEON regardless of globally used
  102. * -mcpu/-mfpu options passed to gcc by performing runtime detection via
  103. * /proc/cpuinfo parsing on linux/android */
  104. while (!parse_proc_cpuinfo(bufsize)) {
  105. bufsize *= 2;
  106. if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
  107. break;
  108. }
  109. #endif
  110. /* Force different settings through environment variables */
  111. env = getenv("JSIMD_FORCENEON");
  112. if ((env != NULL) && (strcmp(env, "1") == 0))
  113. simd_support = JSIMD_ARM_NEON;
  114. env = getenv("JSIMD_FORCENONE");
  115. if ((env != NULL) && (strcmp(env, "1") == 0))
  116. simd_support = 0;
  117. env = getenv("JSIMD_NOHUFFENC");
  118. if ((env != NULL) && (strcmp(env, "1") == 0))
  119. simd_huffman = 0;
  120. }
  121. GLOBAL(int)
  122. jsimd_can_rgb_ycc (void)
  123. {
  124. init_simd();
  125. /* The code is optimised for these values only */
  126. if (BITS_IN_JSAMPLE != 8)
  127. return 0;
  128. if (sizeof(JDIMENSION) != 4)
  129. return 0;
  130. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  131. return 0;
  132. if (simd_support & JSIMD_ARM_NEON)
  133. return 1;
  134. return 0;
  135. }
  136. GLOBAL(int)
  137. jsimd_can_rgb_gray (void)
  138. {
  139. init_simd();
  140. return 0;
  141. }
  142. GLOBAL(int)
  143. jsimd_can_ycc_rgb (void)
  144. {
  145. init_simd();
  146. /* The code is optimised for these values only */
  147. if (BITS_IN_JSAMPLE != 8)
  148. return 0;
  149. if (sizeof(JDIMENSION) != 4)
  150. return 0;
  151. if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
  152. return 0;
  153. if (simd_support & JSIMD_ARM_NEON)
  154. return 1;
  155. return 0;
  156. }
  157. GLOBAL(int)
  158. jsimd_can_ycc_rgb565 (void)
  159. {
  160. init_simd();
  161. /* The code is optimised for these values only */
  162. if (BITS_IN_JSAMPLE != 8)
  163. return 0;
  164. if (sizeof(JDIMENSION) != 4)
  165. return 0;
  166. if (simd_support & JSIMD_ARM_NEON)
  167. return 1;
  168. return 0;
  169. }
  170. GLOBAL(void)
  171. jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
  172. JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
  173. JDIMENSION output_row, int num_rows)
  174. {
  175. void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
  176. switch(cinfo->in_color_space) {
  177. case JCS_EXT_RGB:
  178. neonfct=jsimd_extrgb_ycc_convert_neon;
  179. break;
  180. case JCS_EXT_RGBX:
  181. case JCS_EXT_RGBA:
  182. neonfct=jsimd_extrgbx_ycc_convert_neon;
  183. break;
  184. case JCS_EXT_BGR:
  185. neonfct=jsimd_extbgr_ycc_convert_neon;
  186. break;
  187. case JCS_EXT_BGRX:
  188. case JCS_EXT_BGRA:
  189. neonfct=jsimd_extbgrx_ycc_convert_neon;
  190. break;
  191. case JCS_EXT_XBGR:
  192. case JCS_EXT_ABGR:
  193. neonfct=jsimd_extxbgr_ycc_convert_neon;
  194. break;
  195. case JCS_EXT_XRGB:
  196. case JCS_EXT_ARGB:
  197. neonfct=jsimd_extxrgb_ycc_convert_neon;
  198. break;
  199. default:
  200. neonfct=jsimd_extrgb_ycc_convert_neon;
  201. break;
  202. }
  203. neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
  204. }
  205. GLOBAL(void)
  206. jsimd_rgb_gray_convert (j_compress_ptr cinfo,
  207. JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
  208. JDIMENSION output_row, int num_rows)
  209. {
  210. }
  211. GLOBAL(void)
  212. jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
  213. JSAMPIMAGE input_buf, JDIMENSION input_row,
  214. JSAMPARRAY output_buf, int num_rows)
  215. {
  216. void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
  217. switch(cinfo->out_color_space) {
  218. case JCS_EXT_RGB:
  219. neonfct=jsimd_ycc_extrgb_convert_neon;
  220. break;
  221. case JCS_EXT_RGBX:
  222. case JCS_EXT_RGBA:
  223. neonfct=jsimd_ycc_extrgbx_convert_neon;
  224. break;
  225. case JCS_EXT_BGR:
  226. neonfct=jsimd_ycc_extbgr_convert_neon;
  227. break;
  228. case JCS_EXT_BGRX:
  229. case JCS_EXT_BGRA:
  230. neonfct=jsimd_ycc_extbgrx_convert_neon;
  231. break;
  232. case JCS_EXT_XBGR:
  233. case JCS_EXT_ABGR:
  234. neonfct=jsimd_ycc_extxbgr_convert_neon;
  235. break;
  236. case JCS_EXT_XRGB:
  237. case JCS_EXT_ARGB:
  238. neonfct=jsimd_ycc_extxrgb_convert_neon;
  239. break;
  240. default:
  241. neonfct=jsimd_ycc_extrgb_convert_neon;
  242. break;
  243. }
  244. neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
  245. }
  246. GLOBAL(void)
  247. jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
  248. JSAMPIMAGE input_buf, JDIMENSION input_row,
  249. JSAMPARRAY output_buf, int num_rows)
  250. {
  251. jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
  252. output_buf, num_rows);
  253. }
  254. GLOBAL(int)
  255. jsimd_can_h2v2_downsample (void)
  256. {
  257. init_simd();
  258. return 0;
  259. }
  260. GLOBAL(int)
  261. jsimd_can_h2v1_downsample (void)
  262. {
  263. init_simd();
  264. return 0;
  265. }
  266. GLOBAL(void)
  267. jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
  268. JSAMPARRAY input_data, JSAMPARRAY output_data)
  269. {
  270. }
  271. GLOBAL(void)
  272. jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
  273. JSAMPARRAY input_data, JSAMPARRAY output_data)
  274. {
  275. }
  276. GLOBAL(int)
  277. jsimd_can_h2v2_upsample (void)
  278. {
  279. init_simd();
  280. return 0;
  281. }
  282. GLOBAL(int)
  283. jsimd_can_h2v1_upsample (void)
  284. {
  285. init_simd();
  286. return 0;
  287. }
  288. GLOBAL(void)
  289. jsimd_h2v2_upsample (j_decompress_ptr cinfo,
  290. jpeg_component_info *compptr,
  291. JSAMPARRAY input_data,
  292. JSAMPARRAY *output_data_ptr)
  293. {
  294. }
  295. GLOBAL(void)
  296. jsimd_h2v1_upsample (j_decompress_ptr cinfo,
  297. jpeg_component_info *compptr,
  298. JSAMPARRAY input_data,
  299. JSAMPARRAY *output_data_ptr)
  300. {
  301. }
  302. GLOBAL(int)
  303. jsimd_can_h2v2_fancy_upsample (void)
  304. {
  305. init_simd();
  306. return 0;
  307. }
  308. GLOBAL(int)
  309. jsimd_can_h2v1_fancy_upsample (void)
  310. {
  311. init_simd();
  312. /* The code is optimised for these values only */
  313. if (BITS_IN_JSAMPLE != 8)
  314. return 0;
  315. if (sizeof(JDIMENSION) != 4)
  316. return 0;
  317. if (simd_support & JSIMD_ARM_NEON)
  318. return 1;
  319. return 0;
  320. }
  321. GLOBAL(void)
  322. jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
  323. jpeg_component_info *compptr,
  324. JSAMPARRAY input_data,
  325. JSAMPARRAY *output_data_ptr)
  326. {
  327. }
  328. GLOBAL(void)
  329. jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
  330. jpeg_component_info *compptr,
  331. JSAMPARRAY input_data,
  332. JSAMPARRAY *output_data_ptr)
  333. {
  334. jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
  335. compptr->downsampled_width, input_data,
  336. output_data_ptr);
  337. }
  338. GLOBAL(int)
  339. jsimd_can_h2v2_merged_upsample (void)
  340. {
  341. init_simd();
  342. return 0;
  343. }
  344. GLOBAL(int)
  345. jsimd_can_h2v1_merged_upsample (void)
  346. {
  347. init_simd();
  348. return 0;
  349. }
  350. GLOBAL(void)
  351. jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
  352. JSAMPIMAGE input_buf,
  353. JDIMENSION in_row_group_ctr,
  354. JSAMPARRAY output_buf)
  355. {
  356. }
  357. GLOBAL(void)
  358. jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
  359. JSAMPIMAGE input_buf,
  360. JDIMENSION in_row_group_ctr,
  361. JSAMPARRAY output_buf)
  362. {
  363. }
  364. GLOBAL(int)
  365. jsimd_can_convsamp (void)
  366. {
  367. init_simd();
  368. /* The code is optimised for these values only */
  369. if (DCTSIZE != 8)
  370. return 0;
  371. if (BITS_IN_JSAMPLE != 8)
  372. return 0;
  373. if (sizeof(JDIMENSION) != 4)
  374. return 0;
  375. if (sizeof(DCTELEM) != 2)
  376. return 0;
  377. if (simd_support & JSIMD_ARM_NEON)
  378. return 1;
  379. return 0;
  380. }
  381. GLOBAL(int)
  382. jsimd_can_convsamp_float (void)
  383. {
  384. init_simd();
  385. return 0;
  386. }
  387. GLOBAL(void)
  388. jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
  389. DCTELEM *workspace)
  390. {
  391. jsimd_convsamp_neon(sample_data, start_col, workspace);
  392. }
  393. GLOBAL(void)
  394. jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
  395. FAST_FLOAT *workspace)
  396. {
  397. }
  398. GLOBAL(int)
  399. jsimd_can_fdct_islow (void)
  400. {
  401. init_simd();
  402. return 0;
  403. }
  404. GLOBAL(int)
  405. jsimd_can_fdct_ifast (void)
  406. {
  407. init_simd();
  408. /* The code is optimised for these values only */
  409. if (DCTSIZE != 8)
  410. return 0;
  411. if (sizeof(DCTELEM) != 2)
  412. return 0;
  413. if (simd_support & JSIMD_ARM_NEON)
  414. return 1;
  415. return 0;
  416. }
  417. GLOBAL(int)
  418. jsimd_can_fdct_float (void)
  419. {
  420. init_simd();
  421. return 0;
  422. }
  423. GLOBAL(void)
  424. jsimd_fdct_islow (DCTELEM *data)
  425. {
  426. }
  427. GLOBAL(void)
  428. jsimd_fdct_ifast (DCTELEM *data)
  429. {
  430. jsimd_fdct_ifast_neon(data);
  431. }
  432. GLOBAL(void)
  433. jsimd_fdct_float (FAST_FLOAT *data)
  434. {
  435. }
  436. GLOBAL(int)
  437. jsimd_can_quantize (void)
  438. {
  439. init_simd();
  440. /* The code is optimised for these values only */
  441. if (DCTSIZE != 8)
  442. return 0;
  443. if (sizeof(JCOEF) != 2)
  444. return 0;
  445. if (sizeof(DCTELEM) != 2)
  446. return 0;
  447. if (simd_support & JSIMD_ARM_NEON)
  448. return 1;
  449. return 0;
  450. }
  451. GLOBAL(int)
  452. jsimd_can_quantize_float (void)
  453. {
  454. init_simd();
  455. return 0;
  456. }
  457. GLOBAL(void)
  458. jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
  459. DCTELEM *workspace)
  460. {
  461. jsimd_quantize_neon(coef_block, divisors, workspace);
  462. }
  463. GLOBAL(void)
  464. jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
  465. FAST_FLOAT *workspace)
  466. {
  467. }
  468. GLOBAL(int)
  469. jsimd_can_idct_2x2 (void)
  470. {
  471. init_simd();
  472. /* The code is optimised for these values only */
  473. if (DCTSIZE != 8)
  474. return 0;
  475. if (sizeof(JCOEF) != 2)
  476. return 0;
  477. if (BITS_IN_JSAMPLE != 8)
  478. return 0;
  479. if (sizeof(JDIMENSION) != 4)
  480. return 0;
  481. if (sizeof(ISLOW_MULT_TYPE) != 2)
  482. return 0;
  483. if (simd_support & JSIMD_ARM_NEON)
  484. return 1;
  485. return 0;
  486. }
  487. GLOBAL(int)
  488. jsimd_can_idct_4x4 (void)
  489. {
  490. init_simd();
  491. /* The code is optimised for these values only */
  492. if (DCTSIZE != 8)
  493. return 0;
  494. if (sizeof(JCOEF) != 2)
  495. return 0;
  496. if (BITS_IN_JSAMPLE != 8)
  497. return 0;
  498. if (sizeof(JDIMENSION) != 4)
  499. return 0;
  500. if (sizeof(ISLOW_MULT_TYPE) != 2)
  501. return 0;
  502. if (simd_support & JSIMD_ARM_NEON)
  503. return 1;
  504. return 0;
  505. }
  506. GLOBAL(void)
  507. jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  508. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  509. JDIMENSION output_col)
  510. {
  511. jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
  512. output_col);
  513. }
  514. GLOBAL(void)
  515. jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  516. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  517. JDIMENSION output_col)
  518. {
  519. jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
  520. output_col);
  521. }
  522. GLOBAL(int)
  523. jsimd_can_idct_islow (void)
  524. {
  525. init_simd();
  526. /* The code is optimised for these values only */
  527. if (DCTSIZE != 8)
  528. return 0;
  529. if (sizeof(JCOEF) != 2)
  530. return 0;
  531. if (BITS_IN_JSAMPLE != 8)
  532. return 0;
  533. if (sizeof(JDIMENSION) != 4)
  534. return 0;
  535. if (sizeof(ISLOW_MULT_TYPE) != 2)
  536. return 0;
  537. if (simd_support & JSIMD_ARM_NEON)
  538. return 1;
  539. return 0;
  540. }
  541. GLOBAL(int)
  542. jsimd_can_idct_ifast (void)
  543. {
  544. init_simd();
  545. /* The code is optimised for these values only */
  546. if (DCTSIZE != 8)
  547. return 0;
  548. if (sizeof(JCOEF) != 2)
  549. return 0;
  550. if (BITS_IN_JSAMPLE != 8)
  551. return 0;
  552. if (sizeof(JDIMENSION) != 4)
  553. return 0;
  554. if (sizeof(IFAST_MULT_TYPE) != 2)
  555. return 0;
  556. if (IFAST_SCALE_BITS != 2)
  557. return 0;
  558. if (simd_support & JSIMD_ARM_NEON)
  559. return 1;
  560. return 0;
  561. }
  562. GLOBAL(int)
  563. jsimd_can_idct_float (void)
  564. {
  565. init_simd();
  566. return 0;
  567. }
  568. GLOBAL(void)
  569. jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  570. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  571. JDIMENSION output_col)
  572. {
  573. jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
  574. output_col);
  575. }
  576. GLOBAL(void)
  577. jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  578. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  579. JDIMENSION output_col)
  580. {
  581. jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
  582. output_col);
  583. }
  584. GLOBAL(void)
  585. jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
  586. JCOEFPTR coef_block, JSAMPARRAY output_buf,
  587. JDIMENSION output_col)
  588. {
  589. }
  590. GLOBAL(int)
  591. jsimd_can_huff_encode_one_block (void)
  592. {
  593. init_simd();
  594. if (DCTSIZE != 8)
  595. return 0;
  596. if (sizeof(JCOEF) != 2)
  597. return 0;
  598. if (simd_support & JSIMD_ARM_NEON && simd_huffman)
  599. return 1;
  600. return 0;
  601. }
  602. GLOBAL(JOCTET*)
  603. jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
  604. int last_dc_val, c_derived_tbl *dctbl,
  605. c_derived_tbl *actbl)
  606. {
  607. return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
  608. dctbl, actbl);
  609. }
  610. #endif // ESENTHEL CHANGED