Adam Ierymenko 6 years ago
parent
commit
900ec143a8
1 changed files with 22 additions and 4 deletions
  1. 22 4
      node/AES.hpp

+ 22 - 4
node/AES.hpp

@@ -193,7 +193,7 @@ private:
 	/**************************************************************************/
 	/**************************************************************************/
 
 
 #ifdef ZT_AES_ARMNEON /******************************************************/
 #ifdef ZT_AES_ARMNEON /******************************************************/
-	static inline uint32x4_t *_aes_256_expAssist_armneon(uint32x4_t prev1,uint32x4_t prev2,uint32_t rcon)
+	static inline void _aes_256_expAssist_armneon(uint32x4_t prev1,uint32x4_t prev2,uint32_t rcon,uint32x4_t *e1,uint32x4_t *e2)
 	{
 	{
 		uint32_t round1[4], round2[4], prv1[4], prv2[4];
 		uint32_t round1[4], round2[4], prv1[4], prv2[4];
 		vst1q_u32(prv1, prev1);
 		vst1q_u32(prv1, prev1);
@@ -206,15 +206,32 @@ private:
 		round2[1] = sub_word(rot_word(round2[0])) ^ rcon ^ prv2[1];
 		round2[1] = sub_word(rot_word(round2[0])) ^ rcon ^ prv2[1];
 		round2[2] = sub_word(rot_word(round2[1])) ^ rcon ^ prv2[2];
 		round2[2] = sub_word(rot_word(round2[1])) ^ rcon ^ prv2[2];
 		round2[3] = sub_word(rot_word(round2[2])) ^ rcon ^ prv2[3];
 		round2[3] = sub_word(rot_word(round2[2])) ^ rcon ^ prv2[3];
-		uint32x4_t expansion[2] = {vld1q_u3(round1), vld1q_u3(round2)};
-		return expansion;
+		*e1 = vld1q_u3(round1);
+		*e2 = vld1q_u3(round2);
+		//uint32x4_t expansion[2] = {vld1q_u3(round1), vld1q_u3(round2)};
+		//return expansion;
 	}
 	}
 	inline void _init_armneon(uint8x16_t encKey)
 	inline void _init_armneon(uint8x16_t encKey)
 	{
 	{
 		uint32x4_t *schedule = _k.neon.k;
 		uint32x4_t *schedule = _k.neon.k;
-		uint32x4_t *doubleRound = nullptr;
+		uint32x4_t e1,e2;
 		(*schedule)[0] = vld1q_u32(encKey);
 		(*schedule)[0] = vld1q_u32(encKey);
 		(*schedule)[1] = vld1q_u32(encKey + 16);
 		(*schedule)[1] = vld1q_u32(encKey + 16);
+		_aes_256_expAssist_armneon((*schedule)[0],(*schedule)[1],0x01,&e1,&e2);
+		(*schedule)[2] = e1; (*schedule)[3] = e2;
+		_aes_256_expAssist_armneon((*schedule)[2],(*schedule)[3],0x01,&e1,&e2);
+		(*schedule)[4] = e1; (*schedule)[5] = e2;
+		_aes_256_expAssist_armneon((*schedule)[4],(*schedule)[5],0x01,&e1,&e2);
+		(*schedule)[6] = e1; (*schedule)[7] = e2;
+		_aes_256_expAssist_armneon((*schedule)[6],(*schedule)[7],0x01,&e1,&e2);
+		(*schedule)[8] = e1; (*schedule)[9] = e2;
+		_aes_256_expAssist_armneon((*schedule)[8],(*schedule)[9],0x01,&e1,&e2);
+		(*schedule)[10] = e1; (*schedule)[11] = e2;
+		_aes_256_expAssist_armneon((*schedule)[10],(*schedule)[11],0x01,&e1,&e2);
+		(*schedule)[12] = e1; (*schedule)[13] = e2;
+		_aes_256_expAssist_armneon((*schedule)[12],(*schedule)[13],0x01,&e1,&e2);
+		(*schedule)[14] = e1;
+		/*
 		doubleRound = _aes_256_expAssist_armneon((*schedule)[0], (*schedule)[1], 0x01);
 		doubleRound = _aes_256_expAssist_armneon((*schedule)[0], (*schedule)[1], 0x01);
 		(*schedule)[2] = doubleRound[0];
 		(*schedule)[2] = doubleRound[0];
 		(*schedule)[3] = doubleRound[1];
 		(*schedule)[3] = doubleRound[1];
@@ -235,6 +252,7 @@ private:
 		(*schedule)[13] = doubleRound[1];
 		(*schedule)[13] = doubleRound[1];
 		doubleRound = _aes_256_expAssist_armneon((*schedule)[12], (*schedule)[13], 0x40);
 		doubleRound = _aes_256_expAssist_armneon((*schedule)[12], (*schedule)[13], 0x40);
 		(*schedule)[14] = doubleRound[0];
 		(*schedule)[14] = doubleRound[0];
+		*/
 	}
 	}
 
 
 	inline void _encrypt_armneon(uint8x16_t *data) const
 	inline void _encrypt_armneon(uint8x16_t *data) const