*/
#include "gosthash2012.h"
+#ifdef __x86_64__
+# ifdef _MSC_VER
+# include <intrin.h>
+# else
+# include <x86intrin.h>
+# endif
+#endif
#if defined(_WIN32) || defined(_WINDOWS)
# define INLINE __inline
const union uint512_u * RESTRICT y)
{
#ifndef __GOST3411_BIG_ENDIAN__
- unsigned int CF;
+ unsigned int CF = 0;
unsigned int i;
- CF = 0;
+# ifdef HAVE_ADDCARRY_U64
+ for (i = 0; i < 8; i++)
+ CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i]));
+# else
for (i = 0; i < 8; i++) {
const unsigned long long left = x->QWORD[i];
unsigned long long sum;
CF = (sum < left);
x->QWORD[i] = sum;
}
-#else
+# endif /* !__x86_64__ */
+#else /* __GOST3411_BIG_ENDIAN__ */
const unsigned char *yp;
unsigned char *xp;
unsigned int i;
buf = xp[i] + yp[i] + (buf >> 8);
xp[i] = (unsigned char)buf & 0xFF;
}
-#endif
+#endif /* __GOST3411_BIG_ENDIAN__ */
}
static void g(union uint512_u *h, const union uint512_u * RESTRICT N,
LOAD(N, xmm0, xmm2, xmm4, xmm6);
XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
- LOAD(m, xmm1, xmm3, xmm5, xmm7);
+ ULOAD(m, xmm1, xmm3, xmm5, xmm7);
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
for (i = 0; i < 11; i++)
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
X128M(h, xmm0, xmm2, xmm4, xmm6);
- X128M(m, xmm0, xmm2, xmm4, xmm6);
-
- UNLOAD(h, xmm0, xmm2, xmm4, xmm6);
+ ULOAD(m, xmm1, xmm3, xmm5, xmm7);
+ X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
- /* Restore the Floating-point status on the CPU */
- _mm_empty();
+ STORE(h, xmm0, xmm2, xmm4, xmm6);
#else
union uint512_u Ki, data;
unsigned int i;
static INLINE void stage3(gost2012_hash_ctx * CTX)
{
- ALIGN(16) union uint512_u buf;
-
- memset(&buf, 0x00, sizeof buf);
- memcpy(&buf, &(CTX->buffer), CTX->bufsize);
- memcpy(&(CTX->buffer), &buf, sizeof(uint512_u));
+ pad(CTX);
+ g(&(CTX->h), &(CTX->N), &(CTX->buffer));
+ add512(&(CTX->Sigma), &CTX->buffer);
- memset(&buf, 0x00, sizeof buf);
+ memset(&(CTX->buffer.B[0]), 0, sizeof(uint512_u));
#ifndef __GOST3411_BIG_ENDIAN__
- buf.QWORD[0] = CTX->bufsize << 3;
+ CTX->buffer.QWORD[0] = CTX->bufsize << 3;
#else
- buf.QWORD[0] = BSWAP64(CTX->bufsize << 3);
+ CTX->buffer.QWORD[0] = BSWAP64(CTX->bufsize << 3);
#endif
-
- pad(CTX);
-
- g(&(CTX->h), &(CTX->N), &(CTX->buffer));
-
- add512(&(CTX->N), &buf);
- add512(&(CTX->Sigma), &CTX->buffer);
+ add512(&(CTX->N), &(CTX->buffer));
g(&(CTX->h), &buffer0, &(CTX->N));
-
g(&(CTX->h), &buffer0, &(CTX->Sigma));
}