2 * GOST R 34.11-2012 core functions.
4 * Copyright (c) 2013 Cryptocom LTD.
5 * This file is distributed under the same license as OpenSSL.
7 * Author: Alexey Degtyarev <alexey@renatasystems.org>
11 #include "gosthash2012.h"
12 #if defined(__x86_64__) || defined(__e2k__)
16 # include <x86intrin.h>
20 #if defined(_WIN32) || defined(_WINDOWS)
21 # define INLINE __inline
23 # define INLINE inline
27 (((x & 0xFF00000000000000ULL) >> 56) | \
28 ((x & 0x00FF000000000000ULL) >> 40) | \
29 ((x & 0x0000FF0000000000ULL) >> 24) | \
30 ((x & 0x000000FF00000000ULL) >> 8) | \
31 ((x & 0x00000000FF000000ULL) << 8) | \
32 ((x & 0x0000000000FF0000ULL) << 24) | \
33 ((x & 0x000000000000FF00ULL) << 40) | \
34 ((x & 0x00000000000000FFULL) << 56))
37 * Initialize gost2012 hash context structure
39 void init_gost2012_hash_ctx(gost2012_hash_ctx * CTX,
40 const unsigned int digest_size)
42 memset(CTX, 0, sizeof(gost2012_hash_ctx));
44 CTX->digest_size = digest_size;
46 * IV for 512-bit hash should be 0^512
47 * IV for 256-bit hash should be (00000001)^64
49 * It's already zeroed when CTX is cleared above, so we only
50 * need to set it to 0x01-s for 256-bit hash.
52 if (digest_size == 256)
53 memset(&CTX->h, 0x01, sizeof(uint512_u));
56 static INLINE void pad(gost2012_hash_ctx * CTX)
58 memset(&(CTX->buffer.B[CTX->bufsize]), 0, sizeof(CTX->buffer) - CTX->bufsize);
59 CTX->buffer.B[CTX->bufsize] = 1;
63 static INLINE void add512(union uint512_u * RESTRICT x,
64 const union uint512_u * RESTRICT y)
66 #ifndef __GOST3411_BIG_ENDIAN__
70 # ifdef HAVE_ADDCARRY_U64
71 for (i = 0; i < 8; i++)
72 CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i]));
74 for (i = 0; i < 8; i++) {
75 const unsigned long long left = x->QWORD[i];
76 unsigned long long sum;
78 sum = left + y->QWORD[i] + CF;
80 * (sum == left): is noop, because it's possible only
81 * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1',
82 * in that case `CF' (carry) retain previous value, which is correct,
83 * because when `left + 0 + 0' there was no overflow (thus no carry),
84 * and when `left + ULLONG_MAX + 1' value is wrapped back to
85 * itself with overflow, thus creating carry.
88 * if `sum' is not wrapped (sum > left) there should not be carry,
89 * if `sum' is wrapped (sum < left) there should be carry.
95 # endif /* !__x86_64__ */
96 #else /* __GOST3411_BIG_ENDIAN__ */
97 const unsigned char *yp;
102 xp = (unsigned char *)&x[0];
103 yp = (const unsigned char *)&y[0];
106 for (i = 0; i < 64; i++) {
107 buf = xp[i] + yp[i] + (buf >> 8);
108 xp[i] = (unsigned char)buf & 0xFF;
110 #endif /* __GOST3411_BIG_ENDIAN__ */
113 static void g(union uint512_u *h, const union uint512_u * RESTRICT N,
114 const union uint512_u * RESTRICT m)
116 #ifdef __GOST3411_HAS_SSE2__
117 __m128i xmm0, xmm2, xmm4, xmm6; /* XMMR0-quadruple */
118 __m128i xmm1, xmm3, xmm5, xmm7; /* XMMR1-quadruple */
121 LOAD(N, xmm0, xmm2, xmm4, xmm6);
122 XLPS128M(h, xmm0, xmm2, xmm4, xmm6);
124 ULOAD(m, xmm1, xmm3, xmm5, xmm7);
125 XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
127 for (i = 0; i < 11; i++)
128 ROUND128(i, xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
130 XLPS128M((&C[11]), xmm0, xmm2, xmm4, xmm6);
131 X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
133 X128M(h, xmm0, xmm2, xmm4, xmm6);
134 ULOAD(m, xmm1, xmm3, xmm5, xmm7);
135 X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7);
137 STORE(h, xmm0, xmm2, xmm4, xmm6);
139 /* Restore the Floating-point status on the CPU */
140 /* This is only required on MMX, but EXTRACT32 is using MMX */
144 union uint512_u Ki, data;
151 XLPS((&Ki), ((const union uint512_u *)&m[0]), (&data));
153 for (i = 0; i < 11; i++)
154 ROUND(i, (&Ki), (&data));
156 XLPS((&Ki), (&C[11]), (&Ki));
157 X((&Ki), (&data), (&data));
160 X((&data), h, (&data));
165 static INLINE void stage2(gost2012_hash_ctx * CTX, const union uint512_u *data)
167 g(&(CTX->h), &(CTX->N), data);
169 add512(&(CTX->N), &buffer512);
170 add512(&(CTX->Sigma), data);
173 static INLINE void stage3(gost2012_hash_ctx * CTX)
176 g(&(CTX->h), &(CTX->N), &(CTX->buffer));
177 add512(&(CTX->Sigma), &CTX->buffer);
179 memset(&(CTX->buffer.B[0]), 0, sizeof(uint512_u));
180 #ifndef __GOST3411_BIG_ENDIAN__
181 CTX->buffer.QWORD[0] = CTX->bufsize << 3;
183 CTX->buffer.QWORD[0] = BSWAP64(CTX->bufsize << 3);
185 add512(&(CTX->N), &(CTX->buffer));
187 g(&(CTX->h), &buffer0, &(CTX->N));
188 g(&(CTX->h), &buffer0, &(CTX->Sigma));
192 * Hash block of arbitrary length
195 void gost2012_hash_block(gost2012_hash_ctx * CTX,
196 const unsigned char *data, size_t len)
198 register size_t chunksize;
199 register size_t bufsize = CTX->bufsize;
203 memcpy(&CTX->buffer.B[0], data, 64);
204 stage2(CTX, &(CTX->buffer));
211 chunksize = 64 - bufsize;
215 memcpy(&CTX->buffer.B[bufsize], data, chunksize);
217 bufsize += chunksize;
222 stage2(CTX, &(CTX->buffer) );
226 CTX->bufsize = bufsize;
230 * Compute hash value from current state of ctx
231 * state of hash ctx becomes invalid and cannot be used for further
234 void gost2012_finish_hash(gost2012_hash_ctx * CTX, unsigned char *digest)
240 if (CTX->digest_size == 256)
241 memcpy(digest, &(CTX->h.QWORD[4]), 32);
243 memcpy(digest, &(CTX->h.QWORD[0]), 64);