- memset(&(CTX->buffer[CTX->bufsize]), 0, sizeof(CTX->buffer) - CTX->bufsize);
- CTX->buffer[CTX->bufsize] = 1;
+ memset(&(CTX->buffer.B[CTX->bufsize]), 0, sizeof(CTX->buffer) - CTX->bufsize);
+ CTX->buffer.B[CTX->bufsize] = 1;
+# ifdef HAVE_ADDCARRY_U64
+ for (i = 0; i < 8; i++)
+ CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i]));
+# else
- const unsigned long long left = x->QWORD[i];
- unsigned long long sum;
-
- sum = left + y->QWORD[i] + CF;
- /*
- * (sum == left): is noop, because it's possible only
- * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1',
- * in that case `CF' (carry) retain previous value, which is correct,
- * because when `left + 0 + 0' there was no overflow (thus no carry),
- * and when `left + ULLONG_MAX + 1' value is wrapped back to
- * itself with overflow, thus creating carry.
- *
- * (sum != left):
- * if `sum' is not wrapped (sum > left) there should not be carry,
- * if `sum' is wrapped (sum < left) there should be carry.
- */
- if (sum != left)
- CF = (sum < left);
- x->QWORD[i] = sum;
+ const unsigned long long left = x->QWORD[i];
+ unsigned long long sum;
+
+ sum = left + y->QWORD[i] + CF;
+ /*
+ * (sum == left): is noop, because it's possible only
+ * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1',
+ * in that case `CF' (carry) retain previous value, which is correct,
+ * because when `left + 0 + 0' there was no overflow (thus no carry),
+ * and when `left + ULLONG_MAX + 1' value is wrapped back to
+ * itself with overflow, thus creating carry.
+ *
+ * (sum != left):
+ * if `sum' is not wrapped (sum > left) there should not be carry,
+ * if `sum' is wrapped (sum < left) there should be carry.
+ */
+ if (sum != left)
+ CF = (sum < left);
+ x->QWORD[i] = sum;
-static void g(union uint512_u *h, const union uint512_u *N,
- const unsigned char *m)
+static void g(union uint512_u *h, const union uint512_u * RESTRICT N,
+ const union uint512_u * RESTRICT m)
{
#ifdef __GOST3411_HAS_SSE2__
__m128i xmm0, xmm2, xmm4, xmm6; /* XMMR0-quadruple */
{
#ifdef __GOST3411_HAS_SSE2__
__m128i xmm0, xmm2, xmm4, xmm6; /* XMMR0-quadruple */
LOAD(N, xmm0, xmm2, xmm4, xmm6);
XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
LOAD(N, xmm0, xmm2, xmm4, xmm6);
XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
- LOAD(m, xmm1, xmm3, xmm5, xmm7);
+ ULOAD(m, xmm1, xmm3, xmm5, xmm7);
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
for (i = 0; i < 11; i++)
XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
for (i = 0; i < 11; i++)
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
X128M(h, xmm0, xmm2, xmm4, xmm6);
X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
X128M(h, xmm0, xmm2, xmm4, xmm6);
- X128M(m, xmm0, xmm2, xmm4, xmm6);
-
- UNLOAD(h, xmm0, xmm2, xmm4, xmm6);
+ ULOAD(m, xmm1, xmm3, xmm5, xmm7);
+ X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
{
g(&(CTX->h), &(CTX->N), data);
add512(&(CTX->N), &buffer512);
{
g(&(CTX->h), &(CTX->N), data);
add512(&(CTX->N), &buffer512);
- ALIGN(16) union uint512_u buf;
-
- memset(&buf, 0x00, sizeof buf);
- memcpy(&buf, &(CTX->buffer), CTX->bufsize);
- memcpy(&(CTX->buffer), &buf, sizeof(uint512_u));
+ pad(CTX);
+ g(&(CTX->h), &(CTX->N), &(CTX->buffer));
+ add512(&(CTX->Sigma), &CTX->buffer);
- pad(CTX);
-
- g(&(CTX->h), &(CTX->N), (const unsigned char *)&(CTX->buffer));
-
- add512(&(CTX->N), &buf);
- add512(&(CTX->Sigma), (const union uint512_u *)&CTX->buffer[0]);
-
- g(&(CTX->h), &buffer0, (const unsigned char *)&(CTX->N));
-
- g(&(CTX->h), &buffer0, (const unsigned char *)&(CTX->Sigma));
+ g(&(CTX->h), &buffer0, &(CTX->N));
+ g(&(CTX->h), &buffer0, &(CTX->Sigma));
void gost2012_hash_block(gost2012_hash_ctx * CTX,
const unsigned char *data, size_t len)
{
void gost2012_hash_block(gost2012_hash_ctx * CTX,
const unsigned char *data, size_t len)
{
- size_t chunksize;
-
- while (len > 63 && CTX->bufsize == 0) {
- memcpy(&CTX->buffer[0], data, 64);
- stage2(CTX, &CTX->buffer[0]);
-
- data += 64;
- len -= 64;
+ register size_t bufsize = CTX->bufsize;
+
+ if (bufsize == 0) {
+ while (len >= 64) {
+ memcpy(&CTX->buffer.B[0], data, 64);
+ stage2(CTX, &(CTX->buffer));
+ data += 64;
+ len -= 64;
+ }