*/
#include "gosthash2012.h"
-#ifdef __x86_64__
-# include <immintrin.h>
-# include <x86intrin.h>
+#if defined(__x86_64__) || defined(__e2k__)
+# ifdef _MSC_VER
+# include <intrin.h>
+# else
+# include <x86intrin.h>
+# endif
#endif
#if defined(_WIN32) || defined(_WINDOWS)
unsigned int CF = 0;
unsigned int i;
-# ifdef __x86_64__
+# ifdef HAVE_ADDCARRY_U64
for (i = 0; i < 8; i++)
- CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i]));
+ CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i]));
# else
for (i = 0; i < 8; i++) {
- const unsigned long long left = x->QWORD[i];
- unsigned long long sum;
-
- sum = left + y->QWORD[i] + CF;
- /*
- * (sum == left): is noop, because it's possible only
- * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1',
- * in that case `CF' (carry) retain previous value, which is correct,
- * because when `left + 0 + 0' there was no overflow (thus no carry),
- * and when `left + ULLONG_MAX + 1' value is wrapped back to
- * itself with overflow, thus creating carry.
- *
- * (sum != left):
- * if `sum' is not wrapped (sum > left) there should not be carry,
- * if `sum' is wrapped (sum < left) there should be carry.
- */
- if (sum != left)
- CF = (sum < left);
- x->QWORD[i] = sum;
+ const unsigned long long left = x->QWORD[i];
+ unsigned long long sum;
+
+ sum = left + y->QWORD[i] + CF;
+ /*
+ * (sum == left): is noop, because it's possible only
+ * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1',
+ * in that case `CF' (carry) retain previous value, which is correct,
+ * because when `left + 0 + 0' there was no overflow (thus no carry),
+ * and when `left + ULLONG_MAX + 1' value is wrapped back to
+ * itself with overflow, thus creating carry.
+ *
+ * (sum != left):
+ * if `sum' is not wrapped (sum > left) there should not be carry,
+ * if `sum' is wrapped (sum < left) there should be carry.
+ */
+ if (sum != left)
+ CF = (sum < left);
+ x->QWORD[i] = sum;
}
# endif /* !__x86_64__ */
#else /* __GOST3411_BIG_ENDIAN__ */
LOAD(N, xmm0, xmm2, xmm4, xmm6);
XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
- LOAD(m, xmm1, xmm3, xmm5, xmm7);
+ ULOAD(m, xmm1, xmm3, xmm5, xmm7);
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
for (i = 0; i < 11; i++)
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
X128M(h, xmm0, xmm2, xmm4, xmm6);
- X128M(m, xmm0, xmm2, xmm4, xmm6);
-
- UNLOAD(h, xmm0, xmm2, xmm4, xmm6);
+ ULOAD(m, xmm1, xmm3, xmm5, xmm7);
+ X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
+ STORE(h, xmm0, xmm2, xmm4, xmm6);
+# ifndef __i386__
/* Restore the Floating-point status on the CPU */
+ /* This is only required on MMX, but EXTRACT32 is using MMX */
_mm_empty();
+# endif
#else
union uint512_u Ki, data;
unsigned int i;
void gost2012_hash_block(gost2012_hash_ctx * CTX,
const unsigned char *data, size_t len)
{
- register size_t chunksize;
register size_t bufsize = CTX->bufsize;
if (bufsize == 0) {
}
while (len) {
- chunksize = 64 - bufsize;
+ register size_t chunksize = 64 - bufsize;
if (chunksize > len)
chunksize = len;