sha512: inline rotr64
function old new delta sha1_process_block64 461 446 -15 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
273abcbf66
commit
4bc3b85894
118
libbb/sha1.c
118
libbb/sha1.c
@ -30,11 +30,29 @@
|
|||||||
|
|
||||||
#include "libbb.h"
|
#include "libbb.h"
|
||||||
|
|
||||||
#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
|
||||||
#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
|
* (for rotX32, there is no difference). Why? My guess is that
|
||||||
/* for sha512: */
|
* macro requires clever common subexpression elimination heuristics
|
||||||
#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
|
* in gcc, while inline basically forces it to happen.
|
||||||
|
*/
|
||||||
|
//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
|
||||||
|
static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
|
||||||
|
{
|
||||||
|
return (x << n) | (x >> (32 - n));
|
||||||
|
}
|
||||||
|
//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
|
||||||
|
static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
|
||||||
|
{
|
||||||
|
return (x >> n) | (x << (32 - n));
|
||||||
|
}
|
||||||
|
/* rotr64 in needed for sha512 only: */
|
||||||
|
//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
|
||||||
|
static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
|
||||||
|
{
|
||||||
|
return (x >> n) | (x << (64 - n));
|
||||||
|
}
|
||||||
#if BB_LITTLE_ENDIAN
|
#if BB_LITTLE_ENDIAN
|
||||||
|
/* ALWAYS_INLINE below would hurt code size, using plain inline: */
|
||||||
static inline uint64_t hton64(uint64_t v)
|
static inline uint64_t hton64(uint64_t v)
|
||||||
{
|
{
|
||||||
return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32);
|
return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32);
|
||||||
@ -44,14 +62,6 @@ static inline uint64_t hton64(uint64_t v)
|
|||||||
#endif
|
#endif
|
||||||
#define ntoh64(v) hton64(v)
|
#define ntoh64(v) hton64(v)
|
||||||
|
|
||||||
/* To check alignment gcc has an appropriate operator. Other
|
|
||||||
compilers don't. */
|
|
||||||
#if defined(__GNUC__) && __GNUC__ >= 2
|
|
||||||
# define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0)
|
|
||||||
#else
|
|
||||||
# define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Some arch headers have conflicting defines */
|
/* Some arch headers have conflicting defines */
|
||||||
#undef ch
|
#undef ch
|
||||||
@ -65,11 +75,8 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
|
|||||||
uint32_t W[80], a, b, c, d, e;
|
uint32_t W[80], a, b, c, d, e;
|
||||||
const uint32_t *words = (uint32_t*) ctx->wbuffer;
|
const uint32_t *words = (uint32_t*) ctx->wbuffer;
|
||||||
|
|
||||||
for (t = 0; t < 16; ++t) {
|
for (t = 0; t < 16; ++t)
|
||||||
W[t] = ntohl(*words);
|
W[t] = ntohl(words[t]);
|
||||||
words++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (/*t = 16*/; t < 80; ++t) {
|
for (/*t = 16*/; t < 80; ++t) {
|
||||||
uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16];
|
uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16];
|
||||||
W[t] = rotl32(T, 1);
|
W[t] = rotl32(T, 1);
|
||||||
@ -190,11 +197,8 @@ static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
|
|||||||
#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
|
#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
|
||||||
|
|
||||||
/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
|
/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
|
||||||
for (t = 0; t < 16; ++t) {
|
for (t = 0; t < 16; ++t)
|
||||||
W[t] = ntohl(*words);
|
W[t] = ntohl(words[t]);
|
||||||
words++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (/*t = 16*/; t < 64; ++t)
|
for (/*t = 16*/; t < 64; ++t)
|
||||||
W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
|
W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
|
||||||
|
|
||||||
@ -269,10 +273,8 @@ static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
|
|||||||
#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
|
#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
|
||||||
|
|
||||||
/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
|
/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
|
||||||
for (t = 0; t < 16; ++t) {
|
for (t = 0; t < 16; ++t)
|
||||||
W[t] = ntoh64(*words);
|
W[t] = ntoh64(words[t]);
|
||||||
words++;
|
|
||||||
}
|
|
||||||
for (/*t = 16*/; t < 80; ++t)
|
for (/*t = 16*/; t < 80; ++t)
|
||||||
W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
|
W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
|
||||||
|
|
||||||
@ -363,18 +365,19 @@ void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
|
|||||||
/* Used also for sha256 */
|
/* Used also for sha256 */
|
||||||
void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
|
void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
unsigned bufpos = ctx->total64 & 63;
|
unsigned bufpos = ctx->total64 & 63;
|
||||||
unsigned add = 64 - bufpos;
|
unsigned remaining;
|
||||||
|
|
||||||
ctx->total64 += len;
|
ctx->total64 += len;
|
||||||
|
#if 0
|
||||||
|
remaining = 64 - bufpos;
|
||||||
|
|
||||||
/* Hash whole blocks */
|
/* Hash whole blocks */
|
||||||
while (len >= add) {
|
while (len >= remaining) {
|
||||||
memcpy(ctx->wbuffer + bufpos, buffer, add);
|
memcpy(ctx->wbuffer + bufpos, buffer, remaining);
|
||||||
buffer = (const char *)buffer + add;
|
buffer = (const char *)buffer + remaining;
|
||||||
len -= add;
|
len -= remaining;
|
||||||
add = 64;
|
remaining = 64;
|
||||||
bufpos = 0;
|
bufpos = 0;
|
||||||
ctx->process_block(ctx);
|
ctx->process_block(ctx);
|
||||||
}
|
}
|
||||||
@ -383,12 +386,8 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
|
|||||||
memcpy(ctx->wbuffer + bufpos, buffer, len);
|
memcpy(ctx->wbuffer + bufpos, buffer, len);
|
||||||
#else
|
#else
|
||||||
/* Tiny bit smaller code */
|
/* Tiny bit smaller code */
|
||||||
unsigned bufpos = ctx->total64 & 63;
|
|
||||||
|
|
||||||
ctx->total64 += len;
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
unsigned remaining = 64 - bufpos;
|
remaining = 64 - bufpos;
|
||||||
if (remaining > len)
|
if (remaining > len)
|
||||||
remaining = len;
|
remaining = len;
|
||||||
/* Copy data into aligned buffer */
|
/* Copy data into aligned buffer */
|
||||||
@ -409,28 +408,8 @@ void FAST_FUNC sha1_hash(sha1_ctx_t *ctx, const void *buffer, size_t len)
|
|||||||
|
|
||||||
void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
|
void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
unsigned bufpos = ctx->total64[0] & 127;
|
|
||||||
unsigned add = 128 - bufpos;
|
|
||||||
|
|
||||||
ctx->total64[0] += len;
|
|
||||||
if (ctx->total64[0] < len)
|
|
||||||
ctx->total64[1]++;
|
|
||||||
|
|
||||||
/* Hash whole blocks */
|
|
||||||
while (len >= add) {
|
|
||||||
memcpy(ctx->wbuffer + bufpos, buffer, add);
|
|
||||||
buffer = (const char *)buffer + add;
|
|
||||||
len -= add;
|
|
||||||
add = 128;
|
|
||||||
bufpos = 0;
|
|
||||||
sha512_process_block128(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Save last, partial blosk */
|
|
||||||
memcpy(ctx->wbuffer + bufpos, buffer, len);
|
|
||||||
#else
|
|
||||||
unsigned bufpos = ctx->total64[0] & 127;
|
unsigned bufpos = ctx->total64[0] & 127;
|
||||||
|
unsigned remaining;
|
||||||
|
|
||||||
/* First increment the byte count. FIPS 180-2 specifies the possible
|
/* First increment the byte count. FIPS 180-2 specifies the possible
|
||||||
length of the file up to 2^128 _bits_.
|
length of the file up to 2^128 _bits_.
|
||||||
@ -438,12 +417,27 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
|
|||||||
ctx->total64[0] += len;
|
ctx->total64[0] += len;
|
||||||
if (ctx->total64[0] < len)
|
if (ctx->total64[0] < len)
|
||||||
ctx->total64[1]++;
|
ctx->total64[1]++;
|
||||||
|
#if 0
|
||||||
|
remaining = 128 - bufpos;
|
||||||
|
|
||||||
|
/* Hash whole blocks */
|
||||||
|
while (len >= remaining) {
|
||||||
|
memcpy(ctx->wbuffer + bufpos, buffer, remaining);
|
||||||
|
buffer = (const char *)buffer + remaining;
|
||||||
|
len -= remaining;
|
||||||
|
remaining = 128;
|
||||||
|
bufpos = 0;
|
||||||
|
sha512_process_block128(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Save last, partial blosk */
|
||||||
|
memcpy(ctx->wbuffer + bufpos, buffer, len);
|
||||||
|
#else
|
||||||
while (1) {
|
while (1) {
|
||||||
unsigned remaining = 128 - bufpos;
|
remaining = 128 - bufpos;
|
||||||
if (remaining > len)
|
if (remaining > len)
|
||||||
remaining = len;
|
remaining = len;
|
||||||
/* Copy data into aligned buffer. */
|
/* Copy data into aligned buffer */
|
||||||
memcpy(ctx->wbuffer + bufpos, buffer, remaining);
|
memcpy(ctx->wbuffer + bufpos, buffer, remaining);
|
||||||
len -= remaining;
|
len -= remaining;
|
||||||
buffer = (const char *)buffer + remaining;
|
buffer = (const char *)buffer + remaining;
|
||||||
@ -452,7 +446,7 @@ void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
|
|||||||
bufpos -= 128;
|
bufpos -= 128;
|
||||||
if (bufpos != 0)
|
if (bufpos != 0)
|
||||||
break;
|
break;
|
||||||
/* Buffer is filled up, process it. */
|
/* Buffer is filled up, process it */
|
||||||
sha512_process_block128(ctx);
|
sha512_process_block128(ctx);
|
||||||
/*bufpos = 0; - already is */
|
/*bufpos = 0; - already is */
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user