aesv8-armx-linux64.S 9.1 KB


  1. // This file is generated from a similarly-named Perl script in the BoringSSL
  2. // source tree. Do not edit by hand.
  3. #if !defined(__has_feature)
  4. #define __has_feature(x) 0
  5. #endif
  6. #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
  7. #define OPENSSL_NO_ASM
  8. #endif
  9. #if !defined(OPENSSL_NO_ASM)
  10. #if defined(__aarch64__)
  11. #include <GFp/arm_arch.h>
  12. #if __ARM_MAX_ARCH__>=7
  13. .text
  14. .arch armv8-a+crypto
  15. .section .rodata
  16. .align 5
  17. .Lrcon:
  18. .long 0x01,0x01,0x01,0x01
  19. .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
  20. .long 0x1b,0x1b,0x1b,0x1b
  21. .text
  22. .globl GFp_aes_hw_set_encrypt_key
  23. .hidden GFp_aes_hw_set_encrypt_key
  24. .type GFp_aes_hw_set_encrypt_key,%function
  25. .align 5
  26. GFp_aes_hw_set_encrypt_key:
  27. .Lenc_key:
  28. // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
  29. AARCH64_VALID_CALL_TARGET
  30. stp x29,x30,[sp,#-16]!
  31. add x29,sp,#0
  32. mov x3,#-1
  33. cmp x0,#0
  34. b.eq .Lenc_key_abort
  35. cmp x2,#0
  36. b.eq .Lenc_key_abort
  37. mov x3,#-2
  38. cmp w1,#128
  39. b.lt .Lenc_key_abort
  40. cmp w1,#256
  41. b.gt .Lenc_key_abort
  42. tst w1,#0x3f
  43. b.ne .Lenc_key_abort
  44. adrp x3,.Lrcon
  45. add x3,x3,:lo12:.Lrcon
  46. cmp w1,#192
  47. eor v0.16b,v0.16b,v0.16b
  48. ld1 {v3.16b},[x0],#16
  49. mov w1,#8 // reuse w1
  50. ld1 {v1.4s,v2.4s},[x3],#32
  51. b.lt .Loop128
  52. // 192-bit key support was removed.
  53. b .L256
  54. .align 4
  55. .Loop128:
  56. tbl v6.16b,{v3.16b},v2.16b
  57. ext v5.16b,v0.16b,v3.16b,#12
  58. st1 {v3.4s},[x2],#16
  59. aese v6.16b,v0.16b
  60. subs w1,w1,#1
  61. eor v3.16b,v3.16b,v5.16b
  62. ext v5.16b,v0.16b,v5.16b,#12
  63. eor v3.16b,v3.16b,v5.16b
  64. ext v5.16b,v0.16b,v5.16b,#12
  65. eor v6.16b,v6.16b,v1.16b
  66. eor v3.16b,v3.16b,v5.16b
  67. shl v1.16b,v1.16b,#1
  68. eor v3.16b,v3.16b,v6.16b
  69. b.ne .Loop128
  70. ld1 {v1.4s},[x3]
  71. tbl v6.16b,{v3.16b},v2.16b
  72. ext v5.16b,v0.16b,v3.16b,#12
  73. st1 {v3.4s},[x2],#16
  74. aese v6.16b,v0.16b
  75. eor v3.16b,v3.16b,v5.16b
  76. ext v5.16b,v0.16b,v5.16b,#12
  77. eor v3.16b,v3.16b,v5.16b
  78. ext v5.16b,v0.16b,v5.16b,#12
  79. eor v6.16b,v6.16b,v1.16b
  80. eor v3.16b,v3.16b,v5.16b
  81. shl v1.16b,v1.16b,#1
  82. eor v3.16b,v3.16b,v6.16b
  83. tbl v6.16b,{v3.16b},v2.16b
  84. ext v5.16b,v0.16b,v3.16b,#12
  85. st1 {v3.4s},[x2],#16
  86. aese v6.16b,v0.16b
  87. eor v3.16b,v3.16b,v5.16b
  88. ext v5.16b,v0.16b,v5.16b,#12
  89. eor v3.16b,v3.16b,v5.16b
  90. ext v5.16b,v0.16b,v5.16b,#12
  91. eor v6.16b,v6.16b,v1.16b
  92. eor v3.16b,v3.16b,v5.16b
  93. eor v3.16b,v3.16b,v6.16b
  94. st1 {v3.4s},[x2]
  95. add x2,x2,#0x50
  96. mov w12,#10
  97. b .Ldone
  98. // 192-bit key support was removed.
  99. .align 4
  100. .L256:
  101. ld1 {v4.16b},[x0]
  102. mov w1,#7
  103. mov w12,#14
  104. st1 {v3.4s},[x2],#16
  105. .Loop256:
  106. tbl v6.16b,{v4.16b},v2.16b
  107. ext v5.16b,v0.16b,v3.16b,#12
  108. st1 {v4.4s},[x2],#16
  109. aese v6.16b,v0.16b
  110. subs w1,w1,#1
  111. eor v3.16b,v3.16b,v5.16b
  112. ext v5.16b,v0.16b,v5.16b,#12
  113. eor v3.16b,v3.16b,v5.16b
  114. ext v5.16b,v0.16b,v5.16b,#12
  115. eor v6.16b,v6.16b,v1.16b
  116. eor v3.16b,v3.16b,v5.16b
  117. shl v1.16b,v1.16b,#1
  118. eor v3.16b,v3.16b,v6.16b
  119. st1 {v3.4s},[x2],#16
  120. b.eq .Ldone
  121. dup v6.4s,v3.s[3] // just splat
  122. ext v5.16b,v0.16b,v4.16b,#12
  123. aese v6.16b,v0.16b
  124. eor v4.16b,v4.16b,v5.16b
  125. ext v5.16b,v0.16b,v5.16b,#12
  126. eor v4.16b,v4.16b,v5.16b
  127. ext v5.16b,v0.16b,v5.16b,#12
  128. eor v4.16b,v4.16b,v5.16b
  129. eor v4.16b,v4.16b,v6.16b
  130. b .Loop256
  131. .Ldone:
  132. str w12,[x2]
  133. mov x3,#0
  134. .Lenc_key_abort:
  135. mov x0,x3 // return value
  136. ldr x29,[sp],#16
  137. ret
  138. .size GFp_aes_hw_set_encrypt_key,.-GFp_aes_hw_set_encrypt_key
  139. .globl GFp_aes_hw_encrypt
  140. .hidden GFp_aes_hw_encrypt
  141. .type GFp_aes_hw_encrypt,%function
  142. .align 5
  143. GFp_aes_hw_encrypt:
  144. AARCH64_VALID_CALL_TARGET
  145. ldr w3,[x2,#240]
  146. ld1 {v0.4s},[x2],#16
  147. ld1 {v2.16b},[x0]
  148. sub w3,w3,#2
  149. ld1 {v1.4s},[x2],#16
  150. .Loop_enc:
  151. aese v2.16b,v0.16b
  152. aesmc v2.16b,v2.16b
  153. ld1 {v0.4s},[x2],#16
  154. subs w3,w3,#2
  155. aese v2.16b,v1.16b
  156. aesmc v2.16b,v2.16b
  157. ld1 {v1.4s},[x2],#16
  158. b.gt .Loop_enc
  159. aese v2.16b,v0.16b
  160. aesmc v2.16b,v2.16b
  161. ld1 {v0.4s},[x2]
  162. aese v2.16b,v1.16b
  163. eor v2.16b,v2.16b,v0.16b
  164. st1 {v2.16b},[x1]
  165. ret
  166. .size GFp_aes_hw_encrypt,.-GFp_aes_hw_encrypt
  167. .globl GFp_aes_hw_decrypt
  168. .hidden GFp_aes_hw_decrypt
  169. .type GFp_aes_hw_decrypt,%function
  170. .align 5
  171. GFp_aes_hw_decrypt:
  172. AARCH64_VALID_CALL_TARGET
  173. ldr w3,[x2,#240]
  174. ld1 {v0.4s},[x2],#16
  175. ld1 {v2.16b},[x0]
  176. sub w3,w3,#2
  177. ld1 {v1.4s},[x2],#16
  178. .Loop_dec:
  179. aesd v2.16b,v0.16b
  180. aesimc v2.16b,v2.16b
  181. ld1 {v0.4s},[x2],#16
  182. subs w3,w3,#2
  183. aesd v2.16b,v1.16b
  184. aesimc v2.16b,v2.16b
  185. ld1 {v1.4s},[x2],#16
  186. b.gt .Loop_dec
  187. aesd v2.16b,v0.16b
  188. aesimc v2.16b,v2.16b
  189. ld1 {v0.4s},[x2]
  190. aesd v2.16b,v1.16b
  191. eor v2.16b,v2.16b,v0.16b
  192. st1 {v2.16b},[x1]
  193. ret
  194. .size GFp_aes_hw_decrypt,.-GFp_aes_hw_decrypt
  195. .globl GFp_aes_hw_ctr32_encrypt_blocks
  196. .hidden GFp_aes_hw_ctr32_encrypt_blocks
  197. .type GFp_aes_hw_ctr32_encrypt_blocks,%function
  198. .align 5
  199. GFp_aes_hw_ctr32_encrypt_blocks:
  200. // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
  201. AARCH64_VALID_CALL_TARGET
  202. stp x29,x30,[sp,#-16]!
  203. add x29,sp,#0
  204. ldr w5,[x3,#240]
  205. ldr w8, [x4, #12]
  206. ld1 {v0.4s},[x4]
  207. ld1 {v16.4s,v17.4s},[x3] // load key schedule...
  208. sub w5,w5,#4
  209. mov x12,#16
  210. cmp x2,#2
  211. add x7,x3,x5,lsl#4 // pointer to last 5 round keys
  212. sub w5,w5,#2
  213. ld1 {v20.4s,v21.4s},[x7],#32
  214. ld1 {v22.4s,v23.4s},[x7],#32
  215. ld1 {v7.4s},[x7]
  216. add x7,x3,#32
  217. mov w6,w5
  218. csel x12,xzr,x12,lo
  219. // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
  220. // affected by silicon errata #1742098 [0] and #1655431 [1],
  221. // respectively, where the second instruction of an aese/aesmc
  222. // instruction pair may execute twice if an interrupt is taken right
  223. // after the first instruction consumes an input register of which a
  224. // single 32-bit lane has been updated the last time it was modified.
  225. //
  226. // This function uses a counter in one 32-bit lane. The vmov lines
  227. // could write to v1.16b and v18.16b directly, but that trips this bugs.
  228. // We write to v6.16b and copy to the final register as a workaround.
  229. //
  230. // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
  231. // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
  232. #ifndef __ARMEB__
  233. rev w8, w8
  234. #endif
  235. add w10, w8, #1
  236. orr v6.16b,v0.16b,v0.16b
  237. rev w10, w10
  238. mov v6.s[3],w10
  239. add w8, w8, #2
  240. orr v1.16b,v6.16b,v6.16b
  241. b.ls .Lctr32_tail
  242. rev w12, w8
  243. mov v6.s[3],w12
  244. sub x2,x2,#3 // bias
  245. orr v18.16b,v6.16b,v6.16b
  246. b .Loop3x_ctr32
  247. .align 4
  248. .Loop3x_ctr32:
  249. aese v0.16b,v16.16b
  250. aesmc v0.16b,v0.16b
  251. aese v1.16b,v16.16b
  252. aesmc v1.16b,v1.16b
  253. aese v18.16b,v16.16b
  254. aesmc v18.16b,v18.16b
  255. ld1 {v16.4s},[x7],#16
  256. subs w6,w6,#2
  257. aese v0.16b,v17.16b
  258. aesmc v0.16b,v0.16b
  259. aese v1.16b,v17.16b
  260. aesmc v1.16b,v1.16b
  261. aese v18.16b,v17.16b
  262. aesmc v18.16b,v18.16b
  263. ld1 {v17.4s},[x7],#16
  264. b.gt .Loop3x_ctr32
  265. aese v0.16b,v16.16b
  266. aesmc v4.16b,v0.16b
  267. aese v1.16b,v16.16b
  268. aesmc v5.16b,v1.16b
  269. ld1 {v2.16b},[x0],#16
  270. add w9,w8,#1
  271. aese v18.16b,v16.16b
  272. aesmc v18.16b,v18.16b
  273. ld1 {v3.16b},[x0],#16
  274. rev w9,w9
  275. aese v4.16b,v17.16b
  276. aesmc v4.16b,v4.16b
  277. aese v5.16b,v17.16b
  278. aesmc v5.16b,v5.16b
  279. ld1 {v19.16b},[x0],#16
  280. mov x7,x3
  281. aese v18.16b,v17.16b
  282. aesmc v17.16b,v18.16b
  283. aese v4.16b,v20.16b
  284. aesmc v4.16b,v4.16b
  285. aese v5.16b,v20.16b
  286. aesmc v5.16b,v5.16b
  287. eor v2.16b,v2.16b,v7.16b
  288. add w10,w8,#2
  289. aese v17.16b,v20.16b
  290. aesmc v17.16b,v17.16b
  291. eor v3.16b,v3.16b,v7.16b
  292. add w8,w8,#3
  293. aese v4.16b,v21.16b
  294. aesmc v4.16b,v4.16b
  295. aese v5.16b,v21.16b
  296. aesmc v5.16b,v5.16b
  297. // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
  298. // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
  299. // 32-bit mode. See the comment above.
  300. eor v19.16b,v19.16b,v7.16b
  301. mov v6.s[3], w9
  302. aese v17.16b,v21.16b
  303. aesmc v17.16b,v17.16b
  304. orr v0.16b,v6.16b,v6.16b
  305. rev w10,w10
  306. aese v4.16b,v22.16b
  307. aesmc v4.16b,v4.16b
  308. mov v6.s[3], w10
  309. rev w12,w8
  310. aese v5.16b,v22.16b
  311. aesmc v5.16b,v5.16b
  312. orr v1.16b,v6.16b,v6.16b
  313. mov v6.s[3], w12
  314. aese v17.16b,v22.16b
  315. aesmc v17.16b,v17.16b
  316. orr v18.16b,v6.16b,v6.16b
  317. subs x2,x2,#3
  318. aese v4.16b,v23.16b
  319. aese v5.16b,v23.16b
  320. aese v17.16b,v23.16b
  321. eor v2.16b,v2.16b,v4.16b
  322. ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
  323. st1 {v2.16b},[x1],#16
  324. eor v3.16b,v3.16b,v5.16b
  325. mov w6,w5
  326. st1 {v3.16b},[x1],#16
  327. eor v19.16b,v19.16b,v17.16b
  328. ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
  329. st1 {v19.16b},[x1],#16
  330. b.hs .Loop3x_ctr32
  331. adds x2,x2,#3
  332. b.eq .Lctr32_done
  333. cmp x2,#1
  334. mov x12,#16
  335. csel x12,xzr,x12,eq
  336. .Lctr32_tail:
  337. aese v0.16b,v16.16b
  338. aesmc v0.16b,v0.16b
  339. aese v1.16b,v16.16b
  340. aesmc v1.16b,v1.16b
  341. ld1 {v16.4s},[x7],#16
  342. subs w6,w6,#2
  343. aese v0.16b,v17.16b
  344. aesmc v0.16b,v0.16b
  345. aese v1.16b,v17.16b
  346. aesmc v1.16b,v1.16b
  347. ld1 {v17.4s},[x7],#16
  348. b.gt .Lctr32_tail
  349. aese v0.16b,v16.16b
  350. aesmc v0.16b,v0.16b
  351. aese v1.16b,v16.16b
  352. aesmc v1.16b,v1.16b
  353. aese v0.16b,v17.16b
  354. aesmc v0.16b,v0.16b
  355. aese v1.16b,v17.16b
  356. aesmc v1.16b,v1.16b
  357. ld1 {v2.16b},[x0],x12
  358. aese v0.16b,v20.16b
  359. aesmc v0.16b,v0.16b
  360. aese v1.16b,v20.16b
  361. aesmc v1.16b,v1.16b
  362. ld1 {v3.16b},[x0]
  363. aese v0.16b,v21.16b
  364. aesmc v0.16b,v0.16b
  365. aese v1.16b,v21.16b
  366. aesmc v1.16b,v1.16b
  367. eor v2.16b,v2.16b,v7.16b
  368. aese v0.16b,v22.16b
  369. aesmc v0.16b,v0.16b
  370. aese v1.16b,v22.16b
  371. aesmc v1.16b,v1.16b
  372. eor v3.16b,v3.16b,v7.16b
  373. aese v0.16b,v23.16b
  374. aese v1.16b,v23.16b
  375. cmp x2,#1
  376. eor v2.16b,v2.16b,v0.16b
  377. eor v3.16b,v3.16b,v1.16b
  378. st1 {v2.16b},[x1],#16
  379. b.eq .Lctr32_done
  380. st1 {v3.16b},[x1]
  381. .Lctr32_done:
  382. ldr x29,[sp],#16
  383. ret
  384. .size GFp_aes_hw_ctr32_encrypt_blocks,.-GFp_aes_hw_ctr32_encrypt_blocks
  385. #endif
  386. #endif
  387. #endif // !OPENSSL_NO_ASM
  388. .section .note.GNU-stack,"",%progbits