des_enc.m4 48 KB


  1. ! des_enc.m4
  2. ! des_enc.S (generated from des_enc.m4)
  3. !
  4. ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
  5. !
  6. ! Version 1.0. 32-bit version.
  7. !
  8. ! June 8, 2000.
  9. !
  10. ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
  11. ! by Andy Polyakov.
  12. !
  13. ! January 1, 2003.
  14. !
  15. ! Assembler version: Copyright Svend Olaf Mikkelsen.
  16. !
  17. ! Original C code: Copyright Eric A. Young.
  18. !
  19. ! This code can be freely used by LibDES/SSLeay/OpenSSL users.
  20. !
  21. ! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
  22. !
  23. ! This version can be redistributed.
  24. !
  25. ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
  26. !
  27. ! Global registers 1 to 5 are used. This is the same as done by the
  28. ! cc compiler. The UltraSPARC load/store little endian feature is used.
  29. !
  30. ! Instruction grouping often refers to one CPU cycle.
  31. !
  32. ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
  33. !
  34. ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
  35. !
  36. ! Performance improvement according to './apps/openssl speed des'
  37. !
  38. ! 32-bit build:
  39. ! 23% faster than cc-5.2 -xarch=v8plus -xO5
  40. ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
  41. ! 64-bit build:
  42. ! 50% faster than cc-5.2 -xarch=v9 -xO5
  43. ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
  44. !
  45. .ident "des_enc.m4 2.1"
  46. .file "des_enc-sparc.S"
  47. #if defined(__SUNPRO_C) && defined(__sparcv9)
  48. # define ABI64 /* They've said -xarch=v9 at command line */
  49. #elif defined(__GNUC__) && defined(__arch64__)
  50. # define ABI64 /* They've said -m64 at command line */
  51. #endif
  52. #ifdef ABI64
  53. .register %g2,#scratch
  54. .register %g3,#scratch
  55. # define FRAME -192
  56. # define BIAS 2047
  57. # define LDPTR ldx
  58. # define STPTR stx
  59. # define ARG0 128
  60. # define ARGSZ 8
  61. # ifndef OPENSSL_SYSNAME_ULTRASPARC
  62. # define OPENSSL_SYSNAME_ULTRASPARC
  63. # endif
  64. #else
  65. # define FRAME -96
  66. # define BIAS 0
  67. # define LDPTR ld
  68. # define STPTR st
  69. # define ARG0 68
  70. # define ARGSZ 4
  71. #endif
  72. #define LOOPS 7
  73. #define global0 %g0
  74. #define global1 %g1
  75. #define global2 %g2
  76. #define global3 %g3
  77. #define global4 %g4
  78. #define global5 %g5
  79. #define local0 %l0
  80. #define local1 %l1
  81. #define local2 %l2
  82. #define local3 %l3
  83. #define local4 %l4
  84. #define local5 %l5
  85. #define local7 %l6
  86. #define local6 %l7
  87. #define in0 %i0
  88. #define in1 %i1
  89. #define in2 %i2
  90. #define in3 %i3
  91. #define in4 %i4
  92. #define in5 %i5
  93. #define in6 %i6
  94. #define in7 %i7
  95. #define out0 %o0
  96. #define out1 %o1
  97. #define out2 %o2
  98. #define out3 %o3
  99. #define out4 %o4
  100. #define out5 %o5
  101. #define out6 %o6
  102. #define out7 %o7
  103. #define stub stb
  104. changequote({,})
  105. ! Macro definitions:
  106. ! {ip_macro}
  107. !
  108. ! The logic used in initial and final permutations is the same as in
  109. ! the C code. The permutations are done with a clever shift, xor, and
  110. ! technique.
  111. !
  112. ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
  113. ! sbox 6 to local6, and addres sbox 8 to out3.
  114. !
  115. ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
  116. !
  117. ! Loads key first round from address in parameter 5 to out0, out1.
  118. !
  119. ! After the the original LibDES initial permutation, the resulting left
  120. ! is in the variable initially used for right and vice versa. The macro
  121. ! implements the possibility to keep the halfs in the original registers.
  122. !
  123. ! parameter 1 left
  124. ! parameter 2 right
  125. ! parameter 3 result left (modify in first round)
  126. ! parameter 4 result right (use in first round)
  127. ! parameter 5 key address
  128. ! parameter 6 1/2 for include encryption/decryption
  129. ! parameter 7 1 for move in1 to in3
  130. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  131. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  132. define(ip_macro, {
  133. ! {ip_macro}
  134. ! $1 $2 $4 $3 $5 $6 $7 $8 $9
  135. ld [out2+256], local1
  136. srl $2, 4, local4
  137. xor local4, $1, local4
  138. ifelse($7,1,{mov in1, in3},{nop})
  139. ld [out2+260], local2
  140. and local4, local1, local4
  141. ifelse($8,1,{mov in3, in4},{})
  142. ifelse($8,2,{mov in4, in3},{})
  143. ld [out2+280], out4 ! loop counter
  144. sll local4, 4, local1
  145. xor $1, local4, $1
  146. ld [out2+264], local3
  147. srl $1, 16, local4
  148. xor $2, local1, $2
  149. ifelse($9,1,{LDPTR KS3, in4},{})
  150. xor local4, $2, local4
  151. nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
  152. ifelse($9,1,{LDPTR KS2, in3},{})
  153. and local4, local2, local4
  154. nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
  155. sll local4, 16, local1
  156. xor $2, local4, $2
  157. srl $2, 2, local4
  158. xor $1, local1, $1
  159. sethi %hi(16711680), local5
  160. xor local4, $1, local4
  161. and local4, local3, local4
  162. or local5, 255, local5
  163. sll local4, 2, local2
  164. xor $1, local4, $1
  165. srl $1, 8, local4
  166. xor $2, local2, $2
  167. xor local4, $2, local4
  168. add global1, 768, global4
  169. and local4, local5, local4
  170. add global1, 1024, global5
  171. ld [out2+272], local7
  172. sll local4, 8, local1
  173. xor $2, local4, $2
  174. srl $2, 1, local4
  175. xor $1, local1, $1
  176. ld [$5], out0 ! key 7531
  177. xor local4, $1, local4
  178. add global1, 256, global2
  179. ld [$5+4], out1 ! key 8642
  180. and local4, local7, local4
  181. add global1, 512, global3
  182. sll local4, 1, local1
  183. xor $1, local4, $1
  184. sll $1, 3, local3
  185. xor $2, local1, $2
  186. sll $2, 3, local2
  187. add global1, 1280, local6 ! address sbox 8
  188. srl $1, 29, local4
  189. add global1, 1792, out3 ! address sbox 8
  190. srl $2, 29, local1
  191. or local4, local3, $4
  192. or local2, local1, $3
  193. ifelse($6, 1, {
  194. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  195. or local2, local1, $3
  196. xor $4, out0, local1
  197. call .des_enc.1
  198. and local1, 252, local1
  199. },{})
  200. ifelse($6, 2, {
  201. ld [out2+284], local5 ! 0x0000FC00 used in the rounds
  202. or local2, local1, $3
  203. xor $4, out0, local1
  204. call .des_dec.1
  205. and local1, 252, local1
  206. },{})
  207. })
  208. ! {rounds_macro}
  209. !
  210. ! The logic used in the DES rounds is the same as in the C code,
  211. ! except that calculations for sbox 1 and sbox 5 begin before
  212. ! the previous round is finished.
  213. !
  214. ! In each round one half (work) is modified based on key and the
  215. ! other half (use).
  216. !
  217. ! In this version we do two rounds in a loop repeated 7 times
  218. ! and two rounds seperately.
  219. !
  220. ! One half has the bits for the sboxes in the following positions:
  221. !
  222. ! 777777xx555555xx333333xx111111xx
  223. !
  224. ! 88xx666666xx444444xx222222xx8888
  225. !
  226. ! The bits for each sbox are xor-ed with the key bits for that box.
  227. ! The above xx bits are cleared, and the result used for lookup in
  228. ! the sbox table. Each sbox entry contains the 4 output bits permuted
  229. ! into 32 bits according to the P permutation.
  230. !
  231. ! In the description of DES, left and right are switched after
  232. ! each round, except after last round. In this code the original
  233. ! left and right are kept in the same register in all rounds, meaning
  234. ! that after the 16 rounds the result for right is in the register
  235. ! originally used for left.
  236. !
  237. ! parameter 1 first work (left in first round)
  238. ! parameter 2 first use (right in first round)
  239. ! parameter 3 enc/dec 1/-1
  240. ! parameter 4 loop label
  241. ! parameter 5 key address register
  242. ! parameter 6 optional address for key next encryption/decryption
  243. ! parameter 7 not empty for include retl
  244. !
  245. ! also compares in2 to 8
  246. define(rounds_macro, {
  247. ! {rounds_macro}
  248. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  249. xor $2, out0, local1
  250. ld [out2+284], local5 ! 0x0000FC00
  251. ba $4
  252. and local1, 252, local1
  253. .align 32
  254. $4:
  255. ! local6 is address sbox 6
  256. ! out3 is address sbox 8
  257. ! out4 is loop counter
  258. ld [global1+local1], local1
  259. xor $2, out1, out1 ! 8642
  260. xor $2, out0, out0 ! 7531
  261. ! fmovs %f0, %f0 ! fxor used for alignment
  262. srl out1, 4, local0 ! rotate 4 right
  263. and out0, local5, local3 ! 3
  264. ! fmovs %f0, %f0
  265. ld [$5+$3*8], local7 ! key 7531 next round
  266. srl local3, 8, local3 ! 3
  267. and local0, 252, local2 ! 2
  268. ! fmovs %f0, %f0
  269. ld [global3+local3],local3 ! 3
  270. sll out1, 28, out1 ! rotate
  271. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  272. ld [global2+local2], local2 ! 2
  273. srl out0, 24, local1 ! 7
  274. or out1, local0, out1 ! rotate
  275. ldub [out2+local1], local1 ! 7 (and 0xFC)
  276. srl out1, 24, local0 ! 8
  277. and out1, local5, local4 ! 4
  278. ldub [out2+local0], local0 ! 8 (and 0xFC)
  279. srl local4, 8, local4 ! 4
  280. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  281. ld [global4+local4],local4 ! 4
  282. srl out1, 16, local2 ! 6
  283. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  284. ld [out3+local0],local0 ! 8
  285. and local2, 252, local2 ! 6
  286. add global1, 1536, local5 ! address sbox 7
  287. ld [local6+local2], local2 ! 6
  288. srl out0, 16, local3 ! 5
  289. xor $1, local4, $1 ! 4 finished
  290. ld [local5+local1],local1 ! 7
  291. and local3, 252, local3 ! 5
  292. xor $1, local0, $1 ! 8 finished
  293. ld [global5+local3],local3 ! 5
  294. xor $1, local2, $1 ! 6 finished
  295. subcc out4, 1, out4
  296. ld [$5+$3*8+4], out0 ! key 8642 next round
  297. xor $1, local7, local2 ! sbox 5 next round
  298. xor $1, local1, $1 ! 7 finished
  299. srl local2, 16, local2 ! sbox 5 next round
  300. xor $1, local3, $1 ! 5 finished
  301. ld [$5+$3*16+4], out1 ! key 8642 next round again
  302. and local2, 252, local2 ! sbox5 next round
  303. ! next round
  304. xor $1, local7, local7 ! 7531
  305. ld [global5+local2], local2 ! 5
  306. srl local7, 24, local3 ! 7
  307. xor $1, out0, out0 ! 8642
  308. ldub [out2+local3], local3 ! 7 (and 0xFC)
  309. srl out0, 4, local0 ! rotate 4 right
  310. and local7, 252, local1 ! 1
  311. sll out0, 28, out0 ! rotate
  312. xor $2, local2, $2 ! 5 finished local2 used
  313. srl local0, 8, local4 ! 4
  314. and local0, 252, local2 ! 2
  315. ld [local5+local3], local3 ! 7
  316. srl local0, 16, local5 ! 6
  317. or out0, local0, out0 ! rotate
  318. ld [global2+local2], local2 ! 2
  319. srl out0, 24, local0
  320. ld [$5+$3*16], out0 ! key 7531 next round
  321. and local4, 252, local4 ! 4
  322. and local5, 252, local5 ! 6
  323. ld [global4+local4], local4 ! 4
  324. xor $2, local3, $2 ! 7 finished local3 used
  325. and local0, 252, local0 ! 8
  326. ld [local6+local5], local5 ! 6
  327. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  328. srl local7, 8, local2 ! 3 start
  329. ld [out3+local0], local0 ! 8
  330. xor $2, local4, $2 ! 4 finished
  331. and local2, 252, local2 ! 3
  332. ld [global1+local1], local1 ! 1
  333. xor $2, local5, $2 ! 6 finished local5 used
  334. ld [global3+local2], local2 ! 3
  335. xor $2, local0, $2 ! 8 finished
  336. add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
  337. ld [out2+284], local5 ! 0x0000FC00
  338. xor $2, out0, local4 ! sbox 1 next round
  339. xor $2, local1, $2 ! 1 finished
  340. xor $2, local2, $2 ! 3 finished
  341. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  342. bne,pt %icc, $4
  343. #else
  344. bne $4
  345. #endif
  346. and local4, 252, local1 ! sbox 1 next round
  347. ! two rounds more:
  348. ld [global1+local1], local1
  349. xor $2, out1, out1
  350. xor $2, out0, out0
  351. srl out1, 4, local0 ! rotate
  352. and out0, local5, local3
  353. ld [$5+$3*8], local7 ! key 7531
  354. srl local3, 8, local3
  355. and local0, 252, local2
  356. ld [global3+local3],local3
  357. sll out1, 28, out1 ! rotate
  358. xor $1, local1, $1 ! 1 finished, local1 now sbox 7
  359. ld [global2+local2], local2
  360. srl out0, 24, local1
  361. or out1, local0, out1 ! rotate
  362. ldub [out2+local1], local1
  363. srl out1, 24, local0
  364. and out1, local5, local4
  365. ldub [out2+local0], local0
  366. srl local4, 8, local4
  367. xor $1, local2, $1 ! 2 finished local2 now sbox 6
  368. ld [global4+local4],local4
  369. srl out1, 16, local2
  370. xor $1, local3, $1 ! 3 finished local3 now sbox 5
  371. ld [out3+local0],local0
  372. and local2, 252, local2
  373. add global1, 1536, local5 ! address sbox 7
  374. ld [local6+local2], local2
  375. srl out0, 16, local3
  376. xor $1, local4, $1 ! 4 finished
  377. ld [local5+local1],local1
  378. and local3, 252, local3
  379. xor $1, local0, $1
  380. ld [global5+local3],local3
  381. xor $1, local2, $1 ! 6 finished
  382. cmp in2, 8
  383. ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
  384. xor $1, local7, local2 ! sbox 5 next round
  385. xor $1, local1, $1 ! 7 finished
  386. ld [$5+$3*8+4], out0
  387. srl local2, 16, local2 ! sbox 5 next round
  388. xor $1, local3, $1 ! 5 finished
  389. and local2, 252, local2
  390. ! next round (two rounds more)
  391. xor $1, local7, local7 ! 7531
  392. ld [global5+local2], local2
  393. srl local7, 24, local3
  394. xor $1, out0, out0 ! 8642
  395. ldub [out2+local3], local3
  396. srl out0, 4, local0 ! rotate
  397. and local7, 252, local1
  398. sll out0, 28, out0 ! rotate
  399. xor $2, local2, $2 ! 5 finished local2 used
  400. srl local0, 8, local4
  401. and local0, 252, local2
  402. ld [local5+local3], local3
  403. srl local0, 16, local5
  404. or out0, local0, out0 ! rotate
  405. ld [global2+local2], local2
  406. srl out0, 24, local0
  407. ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
  408. and local4, 252, local4
  409. and local5, 252, local5
  410. ld [global4+local4], local4
  411. xor $2, local3, $2 ! 7 finished local3 used
  412. and local0, 252, local0
  413. ld [local6+local5], local5
  414. xor $2, local2, $2 ! 2 finished local2 now sbox 3
  415. srl local7, 8, local2 ! 3 start
  416. ld [out3+local0], local0
  417. xor $2, local4, $2
  418. and local2, 252, local2
  419. ld [global1+local1], local1
  420. xor $2, local5, $2 ! 6 finished local5 used
  421. ld [global3+local2], local2
  422. srl $1, 3, local3
  423. xor $2, local0, $2
  424. ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
  425. sll $1, 29, local4
  426. xor $2, local1, $2
  427. ifelse($7,{}, {}, {retl})
  428. xor $2, local2, $2
  429. })
  430. ! {fp_macro}
  431. !
  432. ! parameter 1 right (original left)
  433. ! parameter 2 left (original right)
  434. ! parameter 3 1 for optional store to [in0]
  435. ! parameter 4 1 for load input/output address to local5/7
  436. !
  437. ! The final permutation logic switches the halfes, meaning that
  438. ! left and right ends up the the registers originally used.
  439. define(fp_macro, {
  440. ! {fp_macro}
  441. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  442. ! initially undo the rotate 3 left done after initial permutation
  443. ! original left is received shifted 3 right and 29 left in local3/4
  444. sll $2, 29, local1
  445. or local3, local4, $1
  446. srl $2, 3, $2
  447. sethi %hi(0x55555555), local2
  448. or $2, local1, $2
  449. or local2, %lo(0x55555555), local2
  450. srl $2, 1, local3
  451. sethi %hi(0x00ff00ff), local1
  452. xor local3, $1, local3
  453. or local1, %lo(0x00ff00ff), local1
  454. and local3, local2, local3
  455. sethi %hi(0x33333333), local4
  456. sll local3, 1, local2
  457. xor $1, local3, $1
  458. srl $1, 8, local3
  459. xor $2, local2, $2
  460. xor local3, $2, local3
  461. or local4, %lo(0x33333333), local4
  462. and local3, local1, local3
  463. sethi %hi(0x0000ffff), local1
  464. sll local3, 8, local2
  465. xor $2, local3, $2
  466. srl $2, 2, local3
  467. xor $1, local2, $1
  468. xor local3, $1, local3
  469. or local1, %lo(0x0000ffff), local1
  470. and local3, local4, local3
  471. sethi %hi(0x0f0f0f0f), local4
  472. sll local3, 2, local2
  473. ifelse($4,1, {LDPTR INPUT, local5})
  474. xor $1, local3, $1
  475. ifelse($4,1, {LDPTR OUTPUT, local7})
  476. srl $1, 16, local3
  477. xor $2, local2, $2
  478. xor local3, $2, local3
  479. or local4, %lo(0x0f0f0f0f), local4
  480. and local3, local1, local3
  481. sll local3, 16, local2
  482. xor $2, local3, local1
  483. srl local1, 4, local3
  484. xor $1, local2, $1
  485. xor local3, $1, local3
  486. and local3, local4, local3
  487. sll local3, 4, local2
  488. xor $1, local3, $1
  489. ! optional store:
  490. ifelse($3,1, {st $1, [in0]})
  491. xor local1, local2, $2
  492. ifelse($3,1, {st $2, [in0+4]})
  493. })
  494. ! {fp_ip_macro}
  495. !
  496. ! Does initial permutation for next block mixed with
  497. ! final permutation for current block.
  498. !
  499. ! parameter 1 original left
  500. ! parameter 2 original right
  501. ! parameter 3 left ip
  502. ! parameter 4 right ip
  503. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  504. ! 2: mov in4 to in3
  505. !
  506. ! also adds -8 to length in2 and loads loop counter to out4
  507. define(fp_ip_macro, {
  508. ! {fp_ip_macro}
  509. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  510. define({temp1},{out4})
  511. define({temp2},{local3})
  512. define({ip1},{local1})
  513. define({ip2},{local2})
  514. define({ip4},{local4})
  515. define({ip5},{local5})
  516. ! $1 in local3, local4
  517. ld [out2+256], ip1
  518. sll out5, 29, temp1
  519. or local3, local4, $1
  520. srl out5, 3, $2
  521. ifelse($5,2,{mov in4, in3})
  522. ld [out2+272], ip5
  523. srl $4, 4, local0
  524. or $2, temp1, $2
  525. srl $2, 1, temp1
  526. xor temp1, $1, temp1
  527. and temp1, ip5, temp1
  528. xor local0, $3, local0
  529. sll temp1, 1, temp2
  530. xor $1, temp1, $1
  531. and local0, ip1, local0
  532. add in2, -8, in2
  533. sll local0, 4, local7
  534. xor $3, local0, $3
  535. ld [out2+268], ip4
  536. srl $1, 8, temp1
  537. xor $2, temp2, $2
  538. ld [out2+260], ip2
  539. srl $3, 16, local0
  540. xor $4, local7, $4
  541. xor temp1, $2, temp1
  542. xor local0, $4, local0
  543. and temp1, ip4, temp1
  544. and local0, ip2, local0
  545. sll temp1, 8, temp2
  546. xor $2, temp1, $2
  547. sll local0, 16, local7
  548. xor $4, local0, $4
  549. srl $2, 2, temp1
  550. xor $1, temp2, $1
  551. ld [out2+264], temp2 ! ip3
  552. srl $4, 2, local0
  553. xor $3, local7, $3
  554. xor temp1, $1, temp1
  555. xor local0, $3, local0
  556. and temp1, temp2, temp1
  557. and local0, temp2, local0
  558. sll temp1, 2, temp2
  559. xor $1, temp1, $1
  560. sll local0, 2, local7
  561. xor $3, local0, $3
  562. srl $1, 16, temp1
  563. xor $2, temp2, $2
  564. srl $3, 8, local0
  565. xor $4, local7, $4
  566. xor temp1, $2, temp1
  567. xor local0, $4, local0
  568. and temp1, ip2, temp1
  569. and local0, ip4, local0
  570. sll temp1, 16, temp2
  571. xor $2, temp1, local4
  572. sll local0, 8, local7
  573. xor $4, local0, $4
  574. srl $4, 1, local0
  575. xor $3, local7, $3
  576. srl local4, 4, temp1
  577. xor local0, $3, local0
  578. xor $1, temp2, $1
  579. and local0, ip5, local0
  580. sll local0, 1, local7
  581. xor temp1, $1, temp1
  582. xor $3, local0, $3
  583. xor $4, local7, $4
  584. sll $3, 3, local5
  585. and temp1, ip1, temp1
  586. sll temp1, 4, temp2
  587. xor $1, temp1, $1
  588. ifelse($5,1,{LDPTR KS2, in4})
  589. sll $4, 3, local2
  590. xor local4, temp2, $2
  591. ! reload since used as temporar:
  592. ld [out2+280], out4 ! loop counter
  593. srl $3, 29, local0
  594. ifelse($5,1,{add in4, 120, in4})
  595. ifelse($5,1,{LDPTR KS1, in3})
  596. srl $4, 29, local7
  597. or local0, local5, $4
  598. or local2, local7, $3
  599. })
  600. ! {load_little_endian}
  601. !
  602. ! parameter 1 address
  603. ! parameter 2 destination left
  604. ! parameter 3 destination right
  605. ! parameter 4 temporar
  606. ! parameter 5 label
  607. define(load_little_endian, {
  608. ! {load_little_endian}
  609. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  610. ! first in memory to rightmost in register
  611. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  612. andcc $1, 3, global0
  613. bne,pn %icc, $5
  614. nop
  615. lda [$1] 0x88, $2
  616. add $1, 4, $4
  617. ba,pt %icc, $5a
  618. lda [$4] 0x88, $3
  619. #endif
  620. $5:
  621. ldub [$1+3], $2
  622. ldub [$1+2], $4
  623. sll $2, 8, $2
  624. or $2, $4, $2
  625. ldub [$1+1], $4
  626. sll $2, 8, $2
  627. or $2, $4, $2
  628. ldub [$1+0], $4
  629. sll $2, 8, $2
  630. or $2, $4, $2
  631. ldub [$1+3+4], $3
  632. ldub [$1+2+4], $4
  633. sll $3, 8, $3
  634. or $3, $4, $3
  635. ldub [$1+1+4], $4
  636. sll $3, 8, $3
  637. or $3, $4, $3
  638. ldub [$1+0+4], $4
  639. sll $3, 8, $3
  640. or $3, $4, $3
  641. $5a:
  642. })
  643. ! {load_little_endian_inc}
  644. !
  645. ! parameter 1 address
  646. ! parameter 2 destination left
  647. ! parameter 3 destination right
  648. ! parameter 4 temporar
  649. ! parameter 4 label
  650. !
  651. ! adds 8 to address
  652. define(load_little_endian_inc, {
  653. ! {load_little_endian_inc}
  654. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  655. ! first in memory to rightmost in register
  656. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  657. andcc $1, 3, global0
  658. bne,pn %icc, $5
  659. nop
  660. lda [$1] 0x88, $2
  661. add $1, 4, $1
  662. lda [$1] 0x88, $3
  663. ba,pt %icc, $5a
  664. add $1, 4, $1
  665. #endif
  666. $5:
  667. ldub [$1+3], $2
  668. ldub [$1+2], $4
  669. sll $2, 8, $2
  670. or $2, $4, $2
  671. ldub [$1+1], $4
  672. sll $2, 8, $2
  673. or $2, $4, $2
  674. ldub [$1+0], $4
  675. sll $2, 8, $2
  676. or $2, $4, $2
  677. ldub [$1+3+4], $3
  678. add $1, 8, $1
  679. ldub [$1+2+4-8], $4
  680. sll $3, 8, $3
  681. or $3, $4, $3
  682. ldub [$1+1+4-8], $4
  683. sll $3, 8, $3
  684. or $3, $4, $3
  685. ldub [$1+0+4-8], $4
  686. sll $3, 8, $3
  687. or $3, $4, $3
  688. $5a:
  689. })
  690. ! {load_n_bytes}
  691. !
  692. ! Loads 1 to 7 bytes little endian
  693. ! Remaining bytes are zeroed.
  694. !
  695. ! parameter 1 address
  696. ! parameter 2 length
  697. ! parameter 3 destination register left
  698. ! parameter 4 destination register right
  699. ! parameter 5 temp
  700. ! parameter 6 temp2
  701. ! parameter 7 label
  702. ! parameter 8 return label
  703. define(load_n_bytes, {
  704. ! {load_n_bytes}
  705. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  706. $7.0: call .+8
  707. sll $2, 2, $6
  708. add %o7,$7.jmp.table-$7.0,$5
  709. add $5, $6, $5
  710. mov 0, $4
  711. ld [$5], $5
  712. jmp %o7+$5
  713. mov 0, $3
  714. $7.7:
  715. ldub [$1+6], $5
  716. sll $5, 16, $5
  717. or $3, $5, $3
  718. $7.6:
  719. ldub [$1+5], $5
  720. sll $5, 8, $5
  721. or $3, $5, $3
  722. $7.5:
  723. ldub [$1+4], $5
  724. or $3, $5, $3
  725. $7.4:
  726. ldub [$1+3], $5
  727. sll $5, 24, $5
  728. or $4, $5, $4
  729. $7.3:
  730. ldub [$1+2], $5
  731. sll $5, 16, $5
  732. or $4, $5, $4
  733. $7.2:
  734. ldub [$1+1], $5
  735. sll $5, 8, $5
  736. or $4, $5, $4
  737. $7.1:
  738. ldub [$1+0], $5
  739. ba $8
  740. or $4, $5, $4
  741. .align 4
  742. $7.jmp.table:
  743. .word 0
  744. .word $7.1-$7.0
  745. .word $7.2-$7.0
  746. .word $7.3-$7.0
  747. .word $7.4-$7.0
  748. .word $7.5-$7.0
  749. .word $7.6-$7.0
  750. .word $7.7-$7.0
  751. })
  752. ! {store_little_endian}
  753. !
  754. ! parameter 1 address
  755. ! parameter 2 source left
  756. ! parameter 3 source right
  757. ! parameter 4 temporar
  758. define(store_little_endian, {
  759. ! {store_little_endian}
  760. ! $1 $2 $3 $4 $5 $6 $7 $8 $9
  761. ! rightmost in register to first in memory
  762. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  763. andcc $1, 3, global0
  764. bne,pn %icc, $5
  765. nop
  766. sta $2, [$1] 0x88
  767. add $1, 4, $4
  768. ba,pt %icc, $5a
  769. sta $3, [$4] 0x88
  770. #endif
  771. $5:
  772. and $2, 255, $4
  773. stub $4, [$1+0]
  774. srl $2, 8, $4
  775. and $4, 255, $4
  776. stub $4, [$1+1]
  777. srl $2, 16, $4
  778. and $4, 255, $4
  779. stub $4, [$1+2]
  780. srl $2, 24, $4
  781. stub $4, [$1+3]
  782. and $3, 255, $4
  783. stub $4, [$1+0+4]
  784. srl $3, 8, $4
  785. and $4, 255, $4
  786. stub $4, [$1+1+4]
  787. srl $3, 16, $4
  788. and $4, 255, $4
  789. stub $4, [$1+2+4]
  790. srl $3, 24, $4
  791. stub $4, [$1+3+4]
  792. $5a:
  793. })
  794. ! {store_n_bytes}
  795. !
  796. ! Stores 1 to 7 bytes little endian
  797. !
  798. ! parameter 1 address
  799. ! parameter 2 length
  800. ! parameter 3 source register left
  801. ! parameter 4 source register right
  802. ! parameter 5 temp
  803. ! parameter 6 temp2
  804. ! parameter 7 label
  805. ! parameter 8 return label
  806. define(store_n_bytes, {
  807. ! {store_n_bytes}
  808. ! $1 $2 $5 $6 $7 $8 $7 $8 $9
  809. $7.0: call .+8
  810. sll $2, 2, $6
  811. add %o7,$7.jmp.table-$7.0,$5
  812. add $5, $6, $5
  813. ld [$5], $5
  814. jmp %o7+$5
  815. nop
  816. $7.7:
  817. srl $3, 16, $5
  818. and $5, 0xff, $5
  819. stub $5, [$1+6]
  820. $7.6:
  821. srl $3, 8, $5
  822. and $5, 0xff, $5
  823. stub $5, [$1+5]
  824. $7.5:
  825. and $3, 0xff, $5
  826. stub $5, [$1+4]
  827. $7.4:
  828. srl $4, 24, $5
  829. stub $5, [$1+3]
  830. $7.3:
  831. srl $4, 16, $5
  832. and $5, 0xff, $5
  833. stub $5, [$1+2]
  834. $7.2:
  835. srl $4, 8, $5
  836. and $5, 0xff, $5
  837. stub $5, [$1+1]
  838. $7.1:
  839. and $4, 0xff, $5
  840. ba $8
  841. stub $5, [$1]
  842. .align 4
  843. $7.jmp.table:
  844. .word 0
  845. .word $7.1-$7.0
  846. .word $7.2-$7.0
  847. .word $7.3-$7.0
  848. .word $7.4-$7.0
  849. .word $7.5-$7.0
  850. .word $7.6-$7.0
  851. .word $7.7-$7.0
  852. })
  853. define(testvalue,{1})
  854. define(register_init, {
  855. ! For test purposes:
  856. sethi %hi(testvalue), local0
  857. or local0, %lo(testvalue), local0
  858. ifelse($1,{},{}, {mov local0, $1})
  859. ifelse($2,{},{}, {mov local0, $2})
  860. ifelse($3,{},{}, {mov local0, $3})
  861. ifelse($4,{},{}, {mov local0, $4})
  862. ifelse($5,{},{}, {mov local0, $5})
  863. ifelse($6,{},{}, {mov local0, $6})
  864. ifelse($7,{},{}, {mov local0, $7})
  865. ifelse($8,{},{}, {mov local0, $8})
  866. mov local0, local1
  867. mov local0, local2
  868. mov local0, local3
  869. mov local0, local4
  870. mov local0, local5
  871. mov local0, local7
  872. mov local0, local6
  873. mov local0, out0
  874. mov local0, out1
  875. mov local0, out2
  876. mov local0, out3
  877. mov local0, out4
  878. mov local0, out5
  879. mov local0, global1
  880. mov local0, global2
  881. mov local0, global3
  882. mov local0, global4
  883. mov local0, global5
  884. })
  885. .section ".text"
  886. .align 32
  887. .des_enc:
  888. ! key address in3
  889. ! loads key next encryption/decryption first round from [in4]
  890. rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
  891. .align 32
  892. .des_dec:
  893. ! implemented with out5 as first parameter to avoid
  894. ! register exchange in ede modes
  895. ! key address in4
  896. ! loads key next encryption/decryption first round from [in3]
  897. rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
  898. ! void DES_encrypt1(data, ks, enc)
  899. ! *******************************
  900. .align 32
  901. .global DES_encrypt1
  902. .type DES_encrypt1,#function
  903. DES_encrypt1:
  904. save %sp, FRAME, %sp
  905. sethi %hi(.PIC.DES_SPtrans-1f),global1
  906. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  907. 1: call .+8
  908. add %o7,global1,global1
  909. sub global1,.PIC.DES_SPtrans-.des_and,out2
  910. ld [in0], in5 ! left
  911. cmp in2, 0 ! enc
  912. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  913. be,pn %icc, .encrypt.dec ! enc/dec
  914. #else
  915. be .encrypt.dec
  916. #endif
  917. ld [in0+4], out5 ! right
  918. ! parameter 6 1/2 for include encryption/decryption
  919. ! parameter 7 1 for move in1 to in3
  920. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  921. ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
  922. rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
  923. fp_macro(in5, out5, 1) ! 1 for store to [in0]
  924. ret
  925. restore
  926. .encrypt.dec:
  927. add in1, 120, in3 ! use last subkey for first round
  928. ! parameter 6 1/2 for include encryption/decryption
  929. ! parameter 7 1 for move in1 to in3
  930. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  931. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
  932. fp_macro(out5, in5, 1) ! 1 for store to [in0]
  933. ret
  934. restore
  935. .DES_encrypt1.end:
  936. .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
  937. ! void DES_encrypt2(data, ks, enc)
  938. !*********************************
  939. ! encrypts/decrypts without initial/final permutation
  940. .align 32
  941. .global DES_encrypt2
  942. .type DES_encrypt2,#function
  943. DES_encrypt2:
  944. save %sp, FRAME, %sp
  945. sethi %hi(.PIC.DES_SPtrans-1f),global1
  946. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  947. 1: call .+8
  948. add %o7,global1,global1
  949. sub global1,.PIC.DES_SPtrans-.des_and,out2
  950. ! Set sbox address 1 to 6 and rotate halfs 3 left
  951. ! Errors caught by destest? Yes. Still? *NO*
  952. !sethi %hi(DES_SPtrans), global1 ! address sbox 1
  953. !or global1, %lo(DES_SPtrans), global1 ! sbox 1
  954. add global1, 256, global2 ! sbox 2
  955. add global1, 512, global3 ! sbox 3
  956. ld [in0], out5 ! right
  957. add global1, 768, global4 ! sbox 4
  958. add global1, 1024, global5 ! sbox 5
  959. ld [in0+4], in5 ! left
  960. add global1, 1280, local6 ! sbox 6
  961. add global1, 1792, out3 ! sbox 8
  962. ! rotate
  963. sll in5, 3, local5
  964. mov in1, in3 ! key address to in3
  965. sll out5, 3, local7
  966. srl in5, 29, in5
  967. srl out5, 29, out5
  968. add in5, local5, in5
  969. add out5, local7, out5
  970. cmp in2, 0
  971. ! we use our own stackframe
  972. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  973. be,pn %icc, .encrypt2.dec ! decryption
  974. #else
  975. be .encrypt2.dec
  976. #endif
  977. STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
  978. ld [in3], out0 ! key 7531 first round
  979. mov LOOPS, out4 ! loop counter
  980. ld [in3+4], out1 ! key 8642 first round
  981. sethi %hi(0x0000FC00), local5
  982. call .des_enc
  983. mov in3, in4
  984. ! rotate
  985. sll in5, 29, in0
  986. srl in5, 3, in5
  987. sll out5, 29, in1
  988. add in5, in0, in5
  989. srl out5, 3, out5
  990. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  991. add out5, in1, out5
  992. st in5, [in0]
  993. st out5, [in0+4]
  994. ret
  995. restore
  996. .encrypt2.dec:
  997. add in3, 120, in4
  998. ld [in4], out0 ! key 7531 first round
  999. mov LOOPS, out4 ! loop counter
  1000. ld [in4+4], out1 ! key 8642 first round
  1001. sethi %hi(0x0000FC00), local5
  1002. mov in5, local1 ! left expected in out5
  1003. mov out5, in5
  1004. call .des_dec
  1005. mov local1, out5
  1006. .encrypt2.finish:
  1007. ! rotate
  1008. sll in5, 29, in0
  1009. srl in5, 3, in5
  1010. sll out5, 29, in1
  1011. add in5, in0, in5
  1012. srl out5, 3, out5
  1013. LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
  1014. add out5, in1, out5
  1015. st out5, [in0]
  1016. st in5, [in0+4]
  1017. ret
  1018. restore
  1019. .DES_encrypt2.end:
  1020. .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
  1021. ! void DES_encrypt3(data, ks1, ks2, ks3)
  1022. ! **************************************
  1023. .align 32
  1024. .global DES_encrypt3
  1025. .type DES_encrypt3,#function
  1026. DES_encrypt3:
  1027. save %sp, FRAME, %sp
  1028. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1029. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1030. 1: call .+8
  1031. add %o7,global1,global1
  1032. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1033. ld [in0], in5 ! left
  1034. add in2, 120, in4 ! ks2
  1035. ld [in0+4], out5 ! right
  1036. mov in3, in2 ! save ks3
  1037. ! parameter 6 1/2 for include encryption/decryption
  1038. ! parameter 7 1 for mov in1 to in3
  1039. ! parameter 8 1 for mov in3 to in4
  1040. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1041. ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
  1042. call .des_dec
  1043. mov in2, in3 ! preload ks3
  1044. call .des_enc
  1045. nop
  1046. fp_macro(in5, out5, 1)
  1047. ret
  1048. restore
  1049. .DES_encrypt3.end:
  1050. .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
  1051. ! void DES_decrypt3(data, ks1, ks2, ks3)
  1052. ! **************************************
  1053. .align 32
  1054. .global DES_decrypt3
  1055. .type DES_decrypt3,#function
  1056. DES_decrypt3:
  1057. save %sp, FRAME, %sp
  1058. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1059. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1060. 1: call .+8
  1061. add %o7,global1,global1
  1062. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1063. ld [in0], in5 ! left
  1064. add in3, 120, in4 ! ks3
  1065. ld [in0+4], out5 ! right
  1066. mov in2, in3 ! ks2
  1067. ! parameter 6 1/2 for include encryption/decryption
  1068. ! parameter 7 1 for mov in1 to in3
  1069. ! parameter 8 1 for mov in3 to in4
  1070. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1071. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
  1072. call .des_enc
  1073. add in1, 120, in4 ! preload ks1
  1074. call .des_dec
  1075. nop
  1076. fp_macro(out5, in5, 1)
  1077. ret
  1078. restore
  1079. .DES_decrypt3.end:
  1080. .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
  1081. ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
  1082. ! *****************************************************************
  1083. .align 32
  1084. .global DES_ncbc_encrypt
  1085. .type DES_ncbc_encrypt,#function
  1086. DES_ncbc_encrypt:
  1087. save %sp, FRAME, %sp
  1088. define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
  1089. define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
  1090. define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1091. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1092. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1093. 1: call .+8
  1094. add %o7,global1,global1
  1095. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1096. cmp in5, 0 ! enc
  1097. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1098. be,pn %icc, .ncbc.dec
  1099. #else
  1100. be .ncbc.dec
  1101. #endif
  1102. STPTR in4, IVEC
  1103. ! addr left right temp label
  1104. load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
  1105. addcc in2, -8, in2 ! bytes missing when first block done
  1106. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1107. bl,pn %icc, .ncbc.enc.seven.or.less
  1108. #else
  1109. bl .ncbc.enc.seven.or.less
  1110. #endif
  1111. mov in3, in4 ! schedule
  1112. .ncbc.enc.next.block:
  1113. load_little_endian(in0, out4, global4, local3, .LLE2) ! block
  1114. .ncbc.enc.next.block_1:
  1115. xor in5, out4, in5 ! iv xor
  1116. xor out5, global4, out5 ! iv xor
  1117. ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
  1118. ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
  1119. .ncbc.enc.next.block_2:
  1120. !// call .des_enc ! compares in2 to 8
  1121. ! rounds inlined for alignment purposes
  1122. add global1, 768, global4 ! address sbox 4 since register used below
  1123. rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
  1124. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1125. bl,pn %icc, .ncbc.enc.next.block_fp
  1126. #else
  1127. bl .ncbc.enc.next.block_fp
  1128. #endif
  1129. add in0, 8, in0 ! input address
  1130. ! If 8 or more bytes are to be encrypted after this block,
  1131. ! we combine final permutation for this block with initial
  1132. ! permutation for next block. Load next block:
  1133. load_little_endian(in0, global3, global4, local5, .LLE12)
  1134. ! parameter 1 original left
  1135. ! parameter 2 original right
  1136. ! parameter 3 left ip
  1137. ! parameter 4 right ip
  1138. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1139. ! 2: mov in4 to in3
  1140. !
  1141. ! also adds -8 to length in2 and loads loop counter to out4
  1142. fp_ip_macro(out0, out1, global3, global4, 2)
  1143. store_little_endian(in1, out0, out1, local3, .SLE10) ! block
  1144. ld [in3], out0 ! key 7531 first round next block
  1145. mov in5, local1
  1146. xor global3, out5, in5 ! iv xor next block
  1147. ld [in3+4], out1 ! key 8642
  1148. add global1, 512, global3 ! address sbox 3 since register used
  1149. xor global4, local1, out5 ! iv xor next block
  1150. ba .ncbc.enc.next.block_2
  1151. add in1, 8, in1 ! output adress
  1152. .ncbc.enc.next.block_fp:
  1153. fp_macro(in5, out5)
  1154. store_little_endian(in1, in5, out5, local3, .SLE1) ! block
  1155. addcc in2, -8, in2 ! bytes missing when next block done
  1156. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1157. bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
  1158. #else
  1159. bpos .ncbc.enc.next.block
  1160. #endif
  1161. add in1, 8, in1
  1162. .ncbc.enc.seven.or.less:
  1163. cmp in2, -8
  1164. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1165. ble,pt %icc, .ncbc.enc.finish
  1166. #else
  1167. ble .ncbc.enc.finish
  1168. #endif
  1169. nop
  1170. add in2, 8, local1 ! bytes to load
  1171. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1172. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
  1173. ! Loads 1 to 7 bytes little endian to global4, out4
  1174. .ncbc.enc.finish:
  1175. LDPTR IVEC, local4
  1176. store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
  1177. ret
  1178. restore
  1179. .ncbc.dec:
  1180. STPTR in0, INPUT
  1181. cmp in2, 0 ! length
  1182. add in3, 120, in3
  1183. LDPTR IVEC, local7 ! ivec
  1184. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1185. ble,pn %icc, .ncbc.dec.finish
  1186. #else
  1187. ble .ncbc.dec.finish
  1188. #endif
  1189. mov in3, in4 ! schedule
  1190. STPTR in1, OUTPUT
  1191. mov in0, local5 ! input
  1192. load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
  1193. .ncbc.dec.next.block:
  1194. load_little_endian(local5, in5, out5, local3, .LLE4) ! block
  1195. ! parameter 6 1/2 for include encryption/decryption
  1196. ! parameter 7 1 for mov in1 to in3
  1197. ! parameter 8 1 for mov in3 to in4
  1198. ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
  1199. fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
  1200. ! in2 is bytes left to be stored
  1201. ! in2 is compared to 8 in the rounds
  1202. xor out5, in0, out4 ! iv xor
  1203. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1204. bl,pn %icc, .ncbc.dec.seven.or.less
  1205. #else
  1206. bl .ncbc.dec.seven.or.less
  1207. #endif
  1208. xor in5, in1, global4 ! iv xor
  1209. ! Load ivec next block now, since input and output address might be the same.
  1210. load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
  1211. store_little_endian(local7, out4, global4, local3, .SLE3)
  1212. STPTR local5, INPUT
  1213. add local7, 8, local7
  1214. addcc in2, -8, in2
  1215. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1216. bg,pt %icc, .ncbc.dec.next.block
  1217. #else
  1218. bg .ncbc.dec.next.block
  1219. #endif
  1220. STPTR local7, OUTPUT
  1221. .ncbc.dec.store.iv:
  1222. LDPTR IVEC, local4 ! ivec
  1223. store_little_endian(local4, in0, in1, local5, .SLE4)
  1224. .ncbc.dec.finish:
  1225. ret
  1226. restore
  1227. .ncbc.dec.seven.or.less:
  1228. load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
  1229. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
  1230. .DES_ncbc_encrypt.end:
  1231. .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
  1232. ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
  1233. ! **************************************************************************
  1234. .align 32
  1235. .global DES_ede3_cbc_encrypt
  1236. .type DES_ede3_cbc_encrypt,#function
  1237. DES_ede3_cbc_encrypt:
  1238. save %sp, FRAME, %sp
  1239. define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
  1240. define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
  1241. define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
  1242. sethi %hi(.PIC.DES_SPtrans-1f),global1
  1243. or global1,%lo(.PIC.DES_SPtrans-1f),global1
  1244. 1: call .+8
  1245. add %o7,global1,global1
  1246. sub global1,.PIC.DES_SPtrans-.des_and,out2
  1247. LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
  1248. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1249. cmp local3, 0 ! enc
  1250. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1251. be,pn %icc, .ede3.dec
  1252. #else
  1253. be .ede3.dec
  1254. #endif
  1255. STPTR in4, KS2
  1256. STPTR in5, KS3
  1257. load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
  1258. addcc in2, -8, in2 ! bytes missing after next block
  1259. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1260. bl,pn %icc, .ede3.enc.seven.or.less
  1261. #else
  1262. bl .ede3.enc.seven.or.less
  1263. #endif
  1264. STPTR in3, KS1
  1265. .ede3.enc.next.block:
  1266. load_little_endian(in0, out4, global4, local3, .LLE7)
  1267. .ede3.enc.next.block_1:
  1268. LDPTR KS2, in4
  1269. xor in5, out4, in5 ! iv xor
  1270. xor out5, global4, out5 ! iv xor
  1271. LDPTR KS1, in3
  1272. add in4, 120, in4 ! for decryption we use last subkey first
  1273. nop
  1274. ip_macro(in5, out5, in5, out5, in3)
  1275. .ede3.enc.next.block_2:
  1276. call .des_enc ! ks1 in3
  1277. nop
  1278. call .des_dec ! ks2 in4
  1279. LDPTR KS3, in3
  1280. call .des_enc ! ks3 in3 compares in2 to 8
  1281. nop
  1282. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1283. bl,pn %icc, .ede3.enc.next.block_fp
  1284. #else
  1285. bl .ede3.enc.next.block_fp
  1286. #endif
  1287. add in0, 8, in0
  1288. ! If 8 or more bytes are to be encrypted after this block,
  1289. ! we combine final permutation for this block with initial
  1290. ! permutation for next block. Load next block:
  1291. load_little_endian(in0, global3, global4, local5, .LLE11)
  1292. ! parameter 1 original left
  1293. ! parameter 2 original right
  1294. ! parameter 3 left ip
  1295. ! parameter 4 right ip
  1296. ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
  1297. ! 2: mov in4 to in3
  1298. !
  1299. ! also adds -8 to length in2 and loads loop counter to out4
  1300. fp_ip_macro(out0, out1, global3, global4, 1)
  1301. store_little_endian(in1, out0, out1, local3, .SLE9) ! block
  1302. mov in5, local1
  1303. xor global3, out5, in5 ! iv xor next block
  1304. ld [in3], out0 ! key 7531
  1305. add global1, 512, global3 ! address sbox 3
  1306. xor global4, local1, out5 ! iv xor next block
  1307. ld [in3+4], out1 ! key 8642
  1308. add global1, 768, global4 ! address sbox 4
  1309. ba .ede3.enc.next.block_2
  1310. add in1, 8, in1
  1311. .ede3.enc.next.block_fp:
  1312. fp_macro(in5, out5)
  1313. store_little_endian(in1, in5, out5, local3, .SLE5) ! block
  1314. addcc in2, -8, in2 ! bytes missing when next block done
  1315. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1316. bpos,pt %icc, .ede3.enc.next.block
  1317. #else
  1318. bpos .ede3.enc.next.block
  1319. #endif
  1320. add in1, 8, in1
  1321. .ede3.enc.seven.or.less:
  1322. cmp in2, -8
  1323. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1324. ble,pt %icc, .ede3.enc.finish
  1325. #else
  1326. ble .ede3.enc.finish
  1327. #endif
  1328. nop
  1329. add in2, 8, local1 ! bytes to load
  1330. ! addr, length, dest left, dest right, temp, temp2, label, ret label
  1331. load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
  1332. .ede3.enc.finish:
  1333. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1334. store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
  1335. ret
  1336. restore
  1337. .ede3.dec:
  1338. STPTR in0, INPUT
  1339. add in5, 120, in5
  1340. STPTR in1, OUTPUT
  1341. mov in0, local5
  1342. add in3, 120, in3
  1343. STPTR in3, KS1
  1344. cmp in2, 0
  1345. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1346. ble %icc, .ede3.dec.finish
  1347. #else
  1348. ble .ede3.dec.finish
  1349. #endif
  1350. STPTR in5, KS3
  1351. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
  1352. load_little_endian(local7, in0, in1, local3, .LLE8)
  1353. .ede3.dec.next.block:
  1354. load_little_endian(local5, in5, out5, local3, .LLE9)
  1355. ! parameter 6 1/2 for include encryption/decryption
  1356. ! parameter 7 1 for mov in1 to in3
  1357. ! parameter 8 1 for mov in3 to in4
  1358. ! parameter 9 1 for load ks3 and ks2 to in4 and in3
  1359. ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
  1360. call .des_enc ! ks2 in3
  1361. LDPTR KS1, in4
  1362. call .des_dec ! ks1 in4
  1363. nop
  1364. fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
  1365. ! in2 is bytes left to be stored
  1366. ! in2 is compared to 8 in the rounds
  1367. xor out5, in0, out4
  1368. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1369. bl,pn %icc, .ede3.dec.seven.or.less
  1370. #else
  1371. bl .ede3.dec.seven.or.less
  1372. #endif
  1373. xor in5, in1, global4
  1374. load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
  1375. store_little_endian(local7, out4, global4, local3, .SLE7) ! block
  1376. STPTR local5, INPUT
  1377. addcc in2, -8, in2
  1378. add local7, 8, local7
  1379. #ifdef OPENSSL_SYSNAME_ULTRASPARC
  1380. bg,pt %icc, .ede3.dec.next.block
  1381. #else
  1382. bg .ede3.dec.next.block
  1383. #endif
  1384. STPTR local7, OUTPUT
  1385. .ede3.dec.store.iv:
  1386. LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
  1387. store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
  1388. .ede3.dec.finish:
  1389. ret
  1390. restore
  1391. .ede3.dec.seven.or.less:
  1392. load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
  1393. store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
  1394. .DES_ede3_cbc_encrypt.end:
  1395. .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
  1396. .align 256
  1397. .type .des_and,#object
  1398. .size .des_and,284
  1399. .des_and:
  1400. ! This table is used for AND 0xFC when it is known that register
  1401. ! bits 8-31 are zero. Makes it possible to do three arithmetic
  1402. ! operations in one cycle.
  1403. .byte 0, 0, 0, 0, 4, 4, 4, 4
  1404. .byte 8, 8, 8, 8, 12, 12, 12, 12
  1405. .byte 16, 16, 16, 16, 20, 20, 20, 20
  1406. .byte 24, 24, 24, 24, 28, 28, 28, 28
  1407. .byte 32, 32, 32, 32, 36, 36, 36, 36
  1408. .byte 40, 40, 40, 40, 44, 44, 44, 44
  1409. .byte 48, 48, 48, 48, 52, 52, 52, 52
  1410. .byte 56, 56, 56, 56, 60, 60, 60, 60
  1411. .byte 64, 64, 64, 64, 68, 68, 68, 68
  1412. .byte 72, 72, 72, 72, 76, 76, 76, 76
  1413. .byte 80, 80, 80, 80, 84, 84, 84, 84
  1414. .byte 88, 88, 88, 88, 92, 92, 92, 92
  1415. .byte 96, 96, 96, 96, 100, 100, 100, 100
  1416. .byte 104, 104, 104, 104, 108, 108, 108, 108
  1417. .byte 112, 112, 112, 112, 116, 116, 116, 116
  1418. .byte 120, 120, 120, 120, 124, 124, 124, 124
  1419. .byte 128, 128, 128, 128, 132, 132, 132, 132
  1420. .byte 136, 136, 136, 136, 140, 140, 140, 140
  1421. .byte 144, 144, 144, 144, 148, 148, 148, 148
  1422. .byte 152, 152, 152, 152, 156, 156, 156, 156
  1423. .byte 160, 160, 160, 160, 164, 164, 164, 164
  1424. .byte 168, 168, 168, 168, 172, 172, 172, 172
  1425. .byte 176, 176, 176, 176, 180, 180, 180, 180
  1426. .byte 184, 184, 184, 184, 188, 188, 188, 188
  1427. .byte 192, 192, 192, 192, 196, 196, 196, 196
  1428. .byte 200, 200, 200, 200, 204, 204, 204, 204
  1429. .byte 208, 208, 208, 208, 212, 212, 212, 212
  1430. .byte 216, 216, 216, 216, 220, 220, 220, 220
  1431. .byte 224, 224, 224, 224, 228, 228, 228, 228
  1432. .byte 232, 232, 232, 232, 236, 236, 236, 236
  1433. .byte 240, 240, 240, 240, 244, 244, 244, 244
  1434. .byte 248, 248, 248, 248, 252, 252, 252, 252
  1435. ! 5 numbers for initil/final permutation
  1436. .word 0x0f0f0f0f ! offset 256
  1437. .word 0x0000ffff ! 260
  1438. .word 0x33333333 ! 264
  1439. .word 0x00ff00ff ! 268
  1440. .word 0x55555555 ! 272
  1441. .word 0 ! 276
  1442. .word LOOPS ! 280
  1443. .word 0x0000FC00 ! 284
  1444. .global DES_SPtrans
  1445. .type DES_SPtrans,#object
  1446. .size DES_SPtrans,2048
  1447. .align 64
  1448. DES_SPtrans:
  1449. .PIC.DES_SPtrans:
  1450. ! nibble 0
  1451. .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
  1452. .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
  1453. .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
  1454. .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
  1455. .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
  1456. .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
  1457. .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
  1458. .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
  1459. .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
  1460. .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
  1461. .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
  1462. .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
  1463. .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
  1464. .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
  1465. .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
  1466. .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
  1467. ! nibble 1
  1468. .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
  1469. .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
  1470. .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
  1471. .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
  1472. .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
  1473. .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
  1474. .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
  1475. .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
  1476. .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
  1477. .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
  1478. .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
  1479. .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
  1480. .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
  1481. .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
  1482. .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
  1483. .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
  1484. ! nibble 2
  1485. .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
  1486. .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
  1487. .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
  1488. .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
  1489. .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
  1490. .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
  1491. .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
  1492. .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
  1493. .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
  1494. .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
  1495. .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
  1496. .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
  1497. .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
  1498. .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
  1499. .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
  1500. .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
  1501. ! nibble 3
  1502. .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
  1503. .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
  1504. .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
  1505. .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
  1506. .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
  1507. .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
  1508. .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
  1509. .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
  1510. .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
  1511. .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
  1512. .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
  1513. .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
  1514. .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
  1515. .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
  1516. .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
  1517. .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
  1518. ! nibble 4
  1519. .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
  1520. .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
  1521. .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
  1522. .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
  1523. .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
  1524. .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
  1525. .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
  1526. .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
  1527. .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
  1528. .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
  1529. .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
  1530. .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
  1531. .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
  1532. .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
  1533. .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
  1534. .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
  1535. ! nibble 5
  1536. .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
  1537. .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
  1538. .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
  1539. .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
  1540. .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
  1541. .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
  1542. .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
  1543. .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
  1544. .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
  1545. .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
  1546. .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
  1547. .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
  1548. .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
  1549. .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
  1550. .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
  1551. .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
  1552. ! nibble 6
  1553. .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
  1554. .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
  1555. .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
  1556. .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
  1557. .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
  1558. .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
  1559. .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
  1560. .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
  1561. .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
  1562. .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
  1563. .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
  1564. .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
  1565. .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
  1566. .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
  1567. .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
  1568. .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
  1569. ! nibble 7
  1570. .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
  1571. .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
  1572. .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
  1573. .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
  1574. .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
  1575. .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
  1576. .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
  1577. .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
  1578. .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
  1579. .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
  1580. .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
  1581. .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
  1582. .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
  1583. .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
  1584. .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
  1585. .word 0x20000000, 0x20800080, 0x00020000, 0x00820080