From 099f14c2e4410ac5628599ff2e7181637a558959 Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Tue, 4 Feb 2020 05:47:15 +0300 Subject: [PATCH] gosthash2012_ref: Optimize XLPS for speed This results in ~20% improvement for reference implementation on my Xeon E5630. `openssl speed -evp streebog256' results (before and after): 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes 3815.25k 12874.41k 31497.81k 49823.06k 59932.67k 57021.78k 4361.29k 14933.06k 37091.67k 59001.86k 71360.51k 72488.28k --- gosthash2012_ref.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/gosthash2012_ref.h b/gosthash2012_ref.h index 25f0ed8..c113e15 100644 --- a/gosthash2012_ref.h +++ b/gosthash2012_ref.h @@ -25,11 +25,10 @@ z->QWORD[7] = x->QWORD[7] ^ y->QWORD[7]; \ } -#ifndef __GOST3411_BIG_ENDIAN__ # define __XLPS_FOR for (_i = 0; _i <= 7; _i++) +#ifndef __GOST3411_BIG_ENDIAN__ # define _datai _i #else -# define __XLPS_FOR for (_i = 7; _i >= 0; _i--) # define _datai 7 - _i #endif @@ -48,14 +47,22 @@ \ \ __XLPS_FOR {\ - data->QWORD[_datai] = Ax[0][(r0 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[1][(r1 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[2][(r2 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[3][(r3 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[4][(r4 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[5][(r5 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[6][(r6 >> (_i << 3)) & 0xFF]; \ - data->QWORD[_datai] ^= Ax[7][(r7 >> (_i << 3)) & 0xFF]; \ + data->QWORD[_datai] = Ax[0][r0 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[1][r1 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[2][r2 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[3][r3 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[4][r4 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[5][r5 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[6][r6 & 0xFF]; \ + data->QWORD[_datai] ^= Ax[7][r7 & 0xFF]; \ + r0 >>= 8; \ + r1 >>= 8; \ + r2 >>= 8; \ + r3 >>= 8; \ + r4 >>= 8; \ + r5 >>= 8; \ + r6 >>= 8; \ + r7 >>= 8; \ }\ } -- 2.39.2