libbb/sha1: shrink x86 hardware accelerated hashing
function old new delta sha1_process_block64_shaNI 32-bit 524 517 -7 sha1_process_block64_shaNI 64-bit 510 508 -2 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
4923f74e58
commit
71a1cccaad
@ -32,14 +32,10 @@
|
|||||||
#define MSG1 %xmm4
|
#define MSG1 %xmm4
|
||||||
#define MSG2 %xmm5
|
#define MSG2 %xmm5
|
||||||
#define MSG3 %xmm6
|
#define MSG3 %xmm6
|
||||||
#define SHUF_MASK %xmm7
|
|
||||||
|
|
||||||
.balign 8 # allow decoders to fetch at least 3 first insns
|
.balign 8 # allow decoders to fetch at least 2 first insns
|
||||||
sha1_process_block64_shaNI:
|
sha1_process_block64_shaNI:
|
||||||
pushl %ebp
|
subl $16, %esp
|
||||||
movl %esp, %ebp
|
|
||||||
subl $32, %esp
|
|
||||||
andl $~0xF, %esp # paddd needs aligned memory operand
|
|
||||||
|
|
||||||
/* load initial hash values */
|
/* load initial hash values */
|
||||||
xor128 E0, E0
|
xor128 E0, E0
|
||||||
@ -47,30 +43,33 @@ sha1_process_block64_shaNI:
|
|||||||
pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word
|
pinsrd $3, 76+4*4(%eax), E0 # load to uppermost 32-bit word
|
||||||
shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
|
shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
|
||||||
|
|
||||||
mova128 PSHUFFLE_BYTE_FLIP_MASK, SHUF_MASK
|
mova128 PSHUFFLE_BYTE_FLIP_MASK, %xmm7
|
||||||
|
|
||||||
|
movu128 0*16(%eax), MSG0
|
||||||
|
pshufb %xmm7, MSG0
|
||||||
|
movu128 1*16(%eax), MSG1
|
||||||
|
pshufb %xmm7, MSG1
|
||||||
|
movu128 2*16(%eax), MSG2
|
||||||
|
pshufb %xmm7, MSG2
|
||||||
|
movu128 3*16(%eax), MSG3
|
||||||
|
pshufb %xmm7, MSG3
|
||||||
|
|
||||||
/* Save hash values for addition after rounds */
|
/* Save hash values for addition after rounds */
|
||||||
movu128 E0, 16(%esp)
|
movu128 E0, %xmm7
|
||||||
movu128 ABCD, (%esp)
|
movu128 ABCD, (%esp)
|
||||||
|
|
||||||
/* Rounds 0-3 */
|
/* Rounds 0-3 */
|
||||||
movu128 0*16(%eax), MSG0
|
|
||||||
pshufb SHUF_MASK, MSG0
|
|
||||||
paddd MSG0, E0
|
paddd MSG0, E0
|
||||||
mova128 ABCD, E1
|
mova128 ABCD, E1
|
||||||
sha1rnds4 $0, E0, ABCD
|
sha1rnds4 $0, E0, ABCD
|
||||||
|
|
||||||
/* Rounds 4-7 */
|
/* Rounds 4-7 */
|
||||||
movu128 1*16(%eax), MSG1
|
|
||||||
pshufb SHUF_MASK, MSG1
|
|
||||||
sha1nexte MSG1, E1
|
sha1nexte MSG1, E1
|
||||||
mova128 ABCD, E0
|
mova128 ABCD, E0
|
||||||
sha1rnds4 $0, E1, ABCD
|
sha1rnds4 $0, E1, ABCD
|
||||||
sha1msg1 MSG1, MSG0
|
sha1msg1 MSG1, MSG0
|
||||||
|
|
||||||
/* Rounds 8-11 */
|
/* Rounds 8-11 */
|
||||||
movu128 2*16(%eax), MSG2
|
|
||||||
pshufb SHUF_MASK, MSG2
|
|
||||||
sha1nexte MSG2, E0
|
sha1nexte MSG2, E0
|
||||||
mova128 ABCD, E1
|
mova128 ABCD, E1
|
||||||
sha1rnds4 $0, E0, ABCD
|
sha1rnds4 $0, E0, ABCD
|
||||||
@ -78,8 +77,6 @@ sha1_process_block64_shaNI:
|
|||||||
xor128 MSG2, MSG0
|
xor128 MSG2, MSG0
|
||||||
|
|
||||||
/* Rounds 12-15 */
|
/* Rounds 12-15 */
|
||||||
movu128 3*16(%eax), MSG3
|
|
||||||
pshufb SHUF_MASK, MSG3
|
|
||||||
sha1nexte MSG3, E1
|
sha1nexte MSG3, E1
|
||||||
mova128 ABCD, E0
|
mova128 ABCD, E0
|
||||||
sha1msg2 MSG3, MSG0
|
sha1msg2 MSG3, MSG0
|
||||||
@ -210,16 +207,16 @@ sha1_process_block64_shaNI:
|
|||||||
sha1rnds4 $3, E1, ABCD
|
sha1rnds4 $3, E1, ABCD
|
||||||
|
|
||||||
/* Add current hash values with previously saved */
|
/* Add current hash values with previously saved */
|
||||||
sha1nexte 16(%esp), E0
|
sha1nexte %xmm7, E0
|
||||||
paddd (%esp), ABCD
|
movu128 (%esp), %xmm7
|
||||||
|
paddd %xmm7, ABCD
|
||||||
|
|
||||||
/* Write hash values back in the correct order */
|
/* Write hash values back in the correct order */
|
||||||
shuf128_32 $0x1B, ABCD, ABCD
|
shuf128_32 $0x1B, ABCD, ABCD
|
||||||
movu128 ABCD, 76(%eax)
|
movu128 ABCD, 76(%eax)
|
||||||
extr128_32 $3, E0, 76+4*4(%eax)
|
extr128_32 $3, E0, 76+4*4(%eax)
|
||||||
|
|
||||||
movl %ebp, %esp
|
addl $16, %esp
|
||||||
popl %ebp
|
|
||||||
ret
|
ret
|
||||||
.size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
|
.size sha1_process_block64_shaNI, .-sha1_process_block64_shaNI
|
||||||
|
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
#define MSG1 %xmm4
|
#define MSG1 %xmm4
|
||||||
#define MSG2 %xmm5
|
#define MSG2 %xmm5
|
||||||
#define MSG3 %xmm6
|
#define MSG3 %xmm6
|
||||||
#define SHUF_MASK %xmm7
|
|
||||||
|
|
||||||
.balign 8 # allow decoders to fetch at least 2 first insns
|
.balign 8 # allow decoders to fetch at least 2 first insns
|
||||||
sha1_process_block64_shaNI:
|
sha1_process_block64_shaNI:
|
||||||
@ -43,30 +42,33 @@ sha1_process_block64_shaNI:
|
|||||||
pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word
|
pinsrd $3, 80+4*4(%rdi), E0 # load to uppermost 32-bit word
|
||||||
shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
|
shuf128_32 $0x1B, ABCD, ABCD # DCBA -> ABCD
|
||||||
|
|
||||||
mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
|
mova128 PSHUFFLE_BYTE_FLIP_MASK(%rip), %xmm7
|
||||||
|
|
||||||
|
movu128 0*16(%rdi), MSG0
|
||||||
|
pshufb %xmm7, MSG0
|
||||||
|
movu128 1*16(%rdi), MSG1
|
||||||
|
pshufb %xmm7, MSG1
|
||||||
|
movu128 2*16(%rdi), MSG2
|
||||||
|
pshufb %xmm7, MSG2
|
||||||
|
movu128 3*16(%rdi), MSG3
|
||||||
|
pshufb %xmm7, MSG3
|
||||||
|
|
||||||
/* Save hash values for addition after rounds */
|
/* Save hash values for addition after rounds */
|
||||||
mova128 E0, %xmm9
|
mova128 E0, %xmm7
|
||||||
mova128 ABCD, %xmm8
|
mova128 ABCD, %xmm8
|
||||||
|
|
||||||
/* Rounds 0-3 */
|
/* Rounds 0-3 */
|
||||||
movu128 0*16(%rdi), MSG0
|
|
||||||
pshufb SHUF_MASK, MSG0
|
|
||||||
paddd MSG0, E0
|
paddd MSG0, E0
|
||||||
mova128 ABCD, E1
|
mova128 ABCD, E1
|
||||||
sha1rnds4 $0, E0, ABCD
|
sha1rnds4 $0, E0, ABCD
|
||||||
|
|
||||||
/* Rounds 4-7 */
|
/* Rounds 4-7 */
|
||||||
movu128 1*16(%rdi), MSG1
|
|
||||||
pshufb SHUF_MASK, MSG1
|
|
||||||
sha1nexte MSG1, E1
|
sha1nexte MSG1, E1
|
||||||
mova128 ABCD, E0
|
mova128 ABCD, E0
|
||||||
sha1rnds4 $0, E1, ABCD
|
sha1rnds4 $0, E1, ABCD
|
||||||
sha1msg1 MSG1, MSG0
|
sha1msg1 MSG1, MSG0
|
||||||
|
|
||||||
/* Rounds 8-11 */
|
/* Rounds 8-11 */
|
||||||
movu128 2*16(%rdi), MSG2
|
|
||||||
pshufb SHUF_MASK, MSG2
|
|
||||||
sha1nexte MSG2, E0
|
sha1nexte MSG2, E0
|
||||||
mova128 ABCD, E1
|
mova128 ABCD, E1
|
||||||
sha1rnds4 $0, E0, ABCD
|
sha1rnds4 $0, E0, ABCD
|
||||||
@ -74,8 +76,6 @@ sha1_process_block64_shaNI:
|
|||||||
xor128 MSG2, MSG0
|
xor128 MSG2, MSG0
|
||||||
|
|
||||||
/* Rounds 12-15 */
|
/* Rounds 12-15 */
|
||||||
movu128 3*16(%rdi), MSG3
|
|
||||||
pshufb SHUF_MASK, MSG3
|
|
||||||
sha1nexte MSG3, E1
|
sha1nexte MSG3, E1
|
||||||
mova128 ABCD, E0
|
mova128 ABCD, E0
|
||||||
sha1msg2 MSG3, MSG0
|
sha1msg2 MSG3, MSG0
|
||||||
@ -206,7 +206,7 @@ sha1_process_block64_shaNI:
|
|||||||
sha1rnds4 $3, E1, ABCD
|
sha1rnds4 $3, E1, ABCD
|
||||||
|
|
||||||
/* Add current hash values with previously saved */
|
/* Add current hash values with previously saved */
|
||||||
sha1nexte %xmm9, E0
|
sha1nexte %xmm7, E0
|
||||||
paddd %xmm8, ABCD
|
paddd %xmm8, ABCD
|
||||||
|
|
||||||
/* Write hash values back in the correct order */
|
/* Write hash values back in the correct order */
|
||||||
|
Loading…
Reference in New Issue
Block a user