summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
ed0f8fb)
This results in ~20% improvement for reference implementation on my
Xeon E5630.
`openssl speed -evp streebog256' results (before and after):
16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
3815.25k 12874.41k 31497.81k 49823.06k 59932.67k 57021.78k
4361.29k 14933.06k 37091.67k 59001.86k 71360.51k 72488.28k
z->QWORD[7] = x->QWORD[7] ^ y->QWORD[7]; \
}
z->QWORD[7] = x->QWORD[7] ^ y->QWORD[7]; \
}
-#ifndef __GOST3411_BIG_ENDIAN__
# define __XLPS_FOR for (_i = 0; _i <= 7; _i++)
# define __XLPS_FOR for (_i = 0; _i <= 7; _i++)
+#ifndef __GOST3411_BIG_ENDIAN__
-# define __XLPS_FOR for (_i = 7; _i >= 0; _i--)
# define _datai 7 - _i
#endif
# define _datai 7 - _i
#endif
- data->QWORD[_datai] = Ax[0][(r0 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[1][(r1 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[2][(r2 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[3][(r3 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[4][(r4 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[5][(r5 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[6][(r6 >> (_i << 3)) & 0xFF]; \
- data->QWORD[_datai] ^= Ax[7][(r7 >> (_i << 3)) & 0xFF]; \
+ data->QWORD[_datai] = Ax[0][r0 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[1][r1 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[2][r2 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[3][r3 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[4][r4 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[5][r5 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[6][r6 & 0xFF]; \
+ data->QWORD[_datai] ^= Ax[7][r7 & 0xFF]; \
+ r0 >>= 8; \
+ r1 >>= 8; \
+ r2 >>= 8; \
+ r3 >>= 8; \
+ r4 >>= 8; \
+ r5 >>= 8; \
+ r6 >>= 8; \
+ r7 >>= 8; \