libbb/sha256: code shrink in 32-bit x86
function old new delta sha256_process_block64_shaNI 713 697 -16 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
eb8d5f3b8f
commit
c0ff0d4528
@ -31,35 +31,27 @@
|
|||||||
#define MSGTMP1 %xmm4
|
#define MSGTMP1 %xmm4
|
||||||
#define MSGTMP2 %xmm5
|
#define MSGTMP2 %xmm5
|
||||||
#define MSGTMP3 %xmm6
|
#define MSGTMP3 %xmm6
|
||||||
#define XMMTMP4 %xmm7
|
|
||||||
|
|
||||||
.balign 8 # allow decoders to fetch at least 3 first insns
|
#define XMMTMP %xmm7
|
||||||
|
|
||||||
|
.balign 8 # allow decoders to fetch at least 2 first insns
|
||||||
sha256_process_block64_shaNI:
|
sha256_process_block64_shaNI:
|
||||||
pushl %ebp
|
|
||||||
movl %esp, %ebp
|
|
||||||
subl $32, %esp
|
|
||||||
andl $~0xF, %esp # paddd needs aligned memory operand
|
|
||||||
|
|
||||||
movu128 76+0*16(%eax), STATE0
|
movu128 76+0*16(%eax), STATE0
|
||||||
movu128 76+1*16(%eax), STATE1
|
movu128 76+1*16(%eax), STATE1
|
||||||
|
|
||||||
shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
|
shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
|
||||||
shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
|
shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
|
||||||
mova128 STATE0, XMMTMP4
|
mova128 STATE0, XMMTMP
|
||||||
palignr $8, STATE1, STATE0 /* ABEF */
|
palignr $8, STATE1, STATE0 /* ABEF */
|
||||||
pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */
|
pblendw $0xF0, XMMTMP, STATE1 /* CDGH */
|
||||||
|
|
||||||
/* XMMTMP4 holds flip mask from here... */
|
/* XMMTMP holds flip mask from here... */
|
||||||
mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP4
|
mova128 PSHUFFLE_BSWAP32_FLIP_MASK, XMMTMP
|
||||||
movl $K256+8*16, SHA256CONSTANTS
|
movl $K256+8*16, SHA256CONSTANTS
|
||||||
|
|
||||||
/* Save hash values for addition after rounds */
|
|
||||||
mova128 STATE0, 0*16(%esp)
|
|
||||||
mova128 STATE1, 1*16(%esp)
|
|
||||||
|
|
||||||
/* Rounds 0-3 */
|
/* Rounds 0-3 */
|
||||||
movu128 0*16(DATA_PTR), MSG
|
movu128 0*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
mova128 MSG, MSGTMP0
|
mova128 MSG, MSGTMP0
|
||||||
paddd 0*16-8*16(SHA256CONSTANTS), MSG
|
paddd 0*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
@ -68,7 +60,7 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 4-7 */
|
/* Rounds 4-7 */
|
||||||
movu128 1*16(DATA_PTR), MSG
|
movu128 1*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
mova128 MSG, MSGTMP1
|
mova128 MSG, MSGTMP1
|
||||||
paddd 1*16-8*16(SHA256CONSTANTS), MSG
|
paddd 1*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
@ -78,7 +70,7 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 8-11 */
|
/* Rounds 8-11 */
|
||||||
movu128 2*16(DATA_PTR), MSG
|
movu128 2*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
mova128 MSG, MSGTMP2
|
mova128 MSG, MSGTMP2
|
||||||
paddd 2*16-8*16(SHA256CONSTANTS), MSG
|
paddd 2*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
@ -88,14 +80,14 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 12-15 */
|
/* Rounds 12-15 */
|
||||||
movu128 3*16(DATA_PTR), MSG
|
movu128 3*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
/* ...to here */
|
/* ...to here */
|
||||||
mova128 MSG, MSGTMP3
|
mova128 MSG, MSGTMP3
|
||||||
paddd 3*16-8*16(SHA256CONSTANTS), MSG
|
paddd 3*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP3, XMMTMP4
|
mova128 MSGTMP3, XMMTMP
|
||||||
palignr $4, MSGTMP2, XMMTMP4
|
palignr $4, MSGTMP2, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP0
|
paddd XMMTMP, MSGTMP0
|
||||||
sha256msg2 MSGTMP3, MSGTMP0
|
sha256msg2 MSGTMP3, MSGTMP0
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -105,9 +97,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP0, MSG
|
mova128 MSGTMP0, MSG
|
||||||
paddd 4*16-8*16(SHA256CONSTANTS), MSG
|
paddd 4*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP0, XMMTMP4
|
mova128 MSGTMP0, XMMTMP
|
||||||
palignr $4, MSGTMP3, XMMTMP4
|
palignr $4, MSGTMP3, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP1
|
paddd XMMTMP, MSGTMP1
|
||||||
sha256msg2 MSGTMP0, MSGTMP1
|
sha256msg2 MSGTMP0, MSGTMP1
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -117,9 +109,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP1, MSG
|
mova128 MSGTMP1, MSG
|
||||||
paddd 5*16-8*16(SHA256CONSTANTS), MSG
|
paddd 5*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP1, XMMTMP4
|
mova128 MSGTMP1, XMMTMP
|
||||||
palignr $4, MSGTMP0, XMMTMP4
|
palignr $4, MSGTMP0, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP2
|
paddd XMMTMP, MSGTMP2
|
||||||
sha256msg2 MSGTMP1, MSGTMP2
|
sha256msg2 MSGTMP1, MSGTMP2
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -129,9 +121,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP2, MSG
|
mova128 MSGTMP2, MSG
|
||||||
paddd 6*16-8*16(SHA256CONSTANTS), MSG
|
paddd 6*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP2, XMMTMP4
|
mova128 MSGTMP2, XMMTMP
|
||||||
palignr $4, MSGTMP1, XMMTMP4
|
palignr $4, MSGTMP1, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP3
|
paddd XMMTMP, MSGTMP3
|
||||||
sha256msg2 MSGTMP2, MSGTMP3
|
sha256msg2 MSGTMP2, MSGTMP3
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -141,9 +133,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP3, MSG
|
mova128 MSGTMP3, MSG
|
||||||
paddd 7*16-8*16(SHA256CONSTANTS), MSG
|
paddd 7*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP3, XMMTMP4
|
mova128 MSGTMP3, XMMTMP
|
||||||
palignr $4, MSGTMP2, XMMTMP4
|
palignr $4, MSGTMP2, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP0
|
paddd XMMTMP, MSGTMP0
|
||||||
sha256msg2 MSGTMP3, MSGTMP0
|
sha256msg2 MSGTMP3, MSGTMP0
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -153,9 +145,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP0, MSG
|
mova128 MSGTMP0, MSG
|
||||||
paddd 8*16-8*16(SHA256CONSTANTS), MSG
|
paddd 8*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP0, XMMTMP4
|
mova128 MSGTMP0, XMMTMP
|
||||||
palignr $4, MSGTMP3, XMMTMP4
|
palignr $4, MSGTMP3, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP1
|
paddd XMMTMP, MSGTMP1
|
||||||
sha256msg2 MSGTMP0, MSGTMP1
|
sha256msg2 MSGTMP0, MSGTMP1
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -165,9 +157,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP1, MSG
|
mova128 MSGTMP1, MSG
|
||||||
paddd 9*16-8*16(SHA256CONSTANTS), MSG
|
paddd 9*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP1, XMMTMP4
|
mova128 MSGTMP1, XMMTMP
|
||||||
palignr $4, MSGTMP0, XMMTMP4
|
palignr $4, MSGTMP0, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP2
|
paddd XMMTMP, MSGTMP2
|
||||||
sha256msg2 MSGTMP1, MSGTMP2
|
sha256msg2 MSGTMP1, MSGTMP2
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -177,9 +169,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP2, MSG
|
mova128 MSGTMP2, MSG
|
||||||
paddd 10*16-8*16(SHA256CONSTANTS), MSG
|
paddd 10*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP2, XMMTMP4
|
mova128 MSGTMP2, XMMTMP
|
||||||
palignr $4, MSGTMP1, XMMTMP4
|
palignr $4, MSGTMP1, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP3
|
paddd XMMTMP, MSGTMP3
|
||||||
sha256msg2 MSGTMP2, MSGTMP3
|
sha256msg2 MSGTMP2, MSGTMP3
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -189,9 +181,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP3, MSG
|
mova128 MSGTMP3, MSG
|
||||||
paddd 11*16-8*16(SHA256CONSTANTS), MSG
|
paddd 11*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP3, XMMTMP4
|
mova128 MSGTMP3, XMMTMP
|
||||||
palignr $4, MSGTMP2, XMMTMP4
|
palignr $4, MSGTMP2, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP0
|
paddd XMMTMP, MSGTMP0
|
||||||
sha256msg2 MSGTMP3, MSGTMP0
|
sha256msg2 MSGTMP3, MSGTMP0
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -201,9 +193,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP0, MSG
|
mova128 MSGTMP0, MSG
|
||||||
paddd 12*16-8*16(SHA256CONSTANTS), MSG
|
paddd 12*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP0, XMMTMP4
|
mova128 MSGTMP0, XMMTMP
|
||||||
palignr $4, MSGTMP3, XMMTMP4
|
palignr $4, MSGTMP3, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP1
|
paddd XMMTMP, MSGTMP1
|
||||||
sha256msg2 MSGTMP0, MSGTMP1
|
sha256msg2 MSGTMP0, MSGTMP1
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -213,9 +205,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP1, MSG
|
mova128 MSGTMP1, MSG
|
||||||
paddd 13*16-8*16(SHA256CONSTANTS), MSG
|
paddd 13*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP1, XMMTMP4
|
mova128 MSGTMP1, XMMTMP
|
||||||
palignr $4, MSGTMP0, XMMTMP4
|
palignr $4, MSGTMP0, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP2
|
paddd XMMTMP, MSGTMP2
|
||||||
sha256msg2 MSGTMP1, MSGTMP2
|
sha256msg2 MSGTMP1, MSGTMP2
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -224,9 +216,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP2, MSG
|
mova128 MSGTMP2, MSG
|
||||||
paddd 14*16-8*16(SHA256CONSTANTS), MSG
|
paddd 14*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP2, XMMTMP4
|
mova128 MSGTMP2, XMMTMP
|
||||||
palignr $4, MSGTMP1, XMMTMP4
|
palignr $4, MSGTMP1, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP3
|
paddd XMMTMP, MSGTMP3
|
||||||
sha256msg2 MSGTMP2, MSGTMP3
|
sha256msg2 MSGTMP2, MSGTMP3
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -238,22 +230,20 @@ sha256_process_block64_shaNI:
|
|||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
|
|
||||||
/* Add current hash values with previously saved */
|
|
||||||
paddd 0*16(%esp), STATE0
|
|
||||||
paddd 1*16(%esp), STATE1
|
|
||||||
|
|
||||||
/* Write hash values back in the correct order */
|
/* Write hash values back in the correct order */
|
||||||
shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
|
shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
|
||||||
shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
|
shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
|
||||||
mova128 STATE0, XMMTMP4
|
mova128 STATE0, XMMTMP
|
||||||
pblendw $0xF0, STATE1, STATE0 /* DCBA */
|
pblendw $0xF0, STATE1, STATE0 /* DCBA */
|
||||||
palignr $8, XMMTMP4, STATE1 /* HGFE */
|
palignr $8, XMMTMP, STATE1 /* HGFE */
|
||||||
|
/* add current hash values to previous ones */
|
||||||
|
movu128 76+0*16(%eax), XMMTMP
|
||||||
|
paddd XMMTMP, STATE0
|
||||||
|
movu128 76+1*16(%eax), XMMTMP
|
||||||
movu128 STATE0, 76+0*16(%eax)
|
movu128 STATE0, 76+0*16(%eax)
|
||||||
|
paddd XMMTMP, STATE1
|
||||||
movu128 STATE1, 76+1*16(%eax)
|
movu128 STATE1, 76+1*16(%eax)
|
||||||
|
|
||||||
movl %ebp, %esp
|
|
||||||
popl %ebp
|
|
||||||
ret
|
ret
|
||||||
.size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI
|
.size sha256_process_block64_shaNI, .-sha256_process_block64_shaNI
|
||||||
|
|
||||||
|
@ -31,7 +31,8 @@
|
|||||||
#define MSGTMP1 %xmm4
|
#define MSGTMP1 %xmm4
|
||||||
#define MSGTMP2 %xmm5
|
#define MSGTMP2 %xmm5
|
||||||
#define MSGTMP3 %xmm6
|
#define MSGTMP3 %xmm6
|
||||||
#define XMMTMP4 %xmm7
|
|
||||||
|
#define XMMTMP %xmm7
|
||||||
|
|
||||||
#define ABEF_SAVE %xmm9
|
#define ABEF_SAVE %xmm9
|
||||||
#define CDGH_SAVE %xmm10
|
#define CDGH_SAVE %xmm10
|
||||||
@ -41,14 +42,14 @@ sha256_process_block64_shaNI:
|
|||||||
movu128 80+0*16(%rdi), STATE0
|
movu128 80+0*16(%rdi), STATE0
|
||||||
movu128 80+1*16(%rdi), STATE1
|
movu128 80+1*16(%rdi), STATE1
|
||||||
|
|
||||||
shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
|
shuf128_32 $0xB1, STATE0, STATE0 /* CDAB */
|
||||||
shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
|
shuf128_32 $0x1B, STATE1, STATE1 /* EFGH */
|
||||||
mova128 STATE0, XMMTMP4
|
mova128 STATE0, XMMTMP
|
||||||
palignr $8, STATE1, STATE0 /* ABEF */
|
palignr $8, STATE1, STATE0 /* ABEF */
|
||||||
pblendw $0xF0, XMMTMP4, STATE1 /* CDGH */
|
pblendw $0xF0, XMMTMP, STATE1 /* CDGH */
|
||||||
|
|
||||||
/* XMMTMP4 holds flip mask from here... */
|
/* XMMTMP holds flip mask from here... */
|
||||||
mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP4
|
mova128 PSHUFFLE_BSWAP32_FLIP_MASK(%rip), XMMTMP
|
||||||
leaq K256+8*16(%rip), SHA256CONSTANTS
|
leaq K256+8*16(%rip), SHA256CONSTANTS
|
||||||
|
|
||||||
/* Save hash values for addition after rounds */
|
/* Save hash values for addition after rounds */
|
||||||
@ -57,7 +58,7 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 0-3 */
|
/* Rounds 0-3 */
|
||||||
movu128 0*16(DATA_PTR), MSG
|
movu128 0*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
mova128 MSG, MSGTMP0
|
mova128 MSG, MSGTMP0
|
||||||
paddd 0*16-8*16(SHA256CONSTANTS), MSG
|
paddd 0*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
@ -66,7 +67,7 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 4-7 */
|
/* Rounds 4-7 */
|
||||||
movu128 1*16(DATA_PTR), MSG
|
movu128 1*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
mova128 MSG, MSGTMP1
|
mova128 MSG, MSGTMP1
|
||||||
paddd 1*16-8*16(SHA256CONSTANTS), MSG
|
paddd 1*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
@ -76,7 +77,7 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 8-11 */
|
/* Rounds 8-11 */
|
||||||
movu128 2*16(DATA_PTR), MSG
|
movu128 2*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
mova128 MSG, MSGTMP2
|
mova128 MSG, MSGTMP2
|
||||||
paddd 2*16-8*16(SHA256CONSTANTS), MSG
|
paddd 2*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
@ -86,14 +87,14 @@ sha256_process_block64_shaNI:
|
|||||||
|
|
||||||
/* Rounds 12-15 */
|
/* Rounds 12-15 */
|
||||||
movu128 3*16(DATA_PTR), MSG
|
movu128 3*16(DATA_PTR), MSG
|
||||||
pshufb XMMTMP4, MSG
|
pshufb XMMTMP, MSG
|
||||||
/* ...to here */
|
/* ...to here */
|
||||||
mova128 MSG, MSGTMP3
|
mova128 MSG, MSGTMP3
|
||||||
paddd 3*16-8*16(SHA256CONSTANTS), MSG
|
paddd 3*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP3, XMMTMP4
|
mova128 MSGTMP3, XMMTMP
|
||||||
palignr $4, MSGTMP2, XMMTMP4
|
palignr $4, MSGTMP2, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP0
|
paddd XMMTMP, MSGTMP0
|
||||||
sha256msg2 MSGTMP3, MSGTMP0
|
sha256msg2 MSGTMP3, MSGTMP0
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -103,9 +104,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP0, MSG
|
mova128 MSGTMP0, MSG
|
||||||
paddd 4*16-8*16(SHA256CONSTANTS), MSG
|
paddd 4*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP0, XMMTMP4
|
mova128 MSGTMP0, XMMTMP
|
||||||
palignr $4, MSGTMP3, XMMTMP4
|
palignr $4, MSGTMP3, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP1
|
paddd XMMTMP, MSGTMP1
|
||||||
sha256msg2 MSGTMP0, MSGTMP1
|
sha256msg2 MSGTMP0, MSGTMP1
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -115,9 +116,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP1, MSG
|
mova128 MSGTMP1, MSG
|
||||||
paddd 5*16-8*16(SHA256CONSTANTS), MSG
|
paddd 5*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP1, XMMTMP4
|
mova128 MSGTMP1, XMMTMP
|
||||||
palignr $4, MSGTMP0, XMMTMP4
|
palignr $4, MSGTMP0, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP2
|
paddd XMMTMP, MSGTMP2
|
||||||
sha256msg2 MSGTMP1, MSGTMP2
|
sha256msg2 MSGTMP1, MSGTMP2
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -127,9 +128,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP2, MSG
|
mova128 MSGTMP2, MSG
|
||||||
paddd 6*16-8*16(SHA256CONSTANTS), MSG
|
paddd 6*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP2, XMMTMP4
|
mova128 MSGTMP2, XMMTMP
|
||||||
palignr $4, MSGTMP1, XMMTMP4
|
palignr $4, MSGTMP1, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP3
|
paddd XMMTMP, MSGTMP3
|
||||||
sha256msg2 MSGTMP2, MSGTMP3
|
sha256msg2 MSGTMP2, MSGTMP3
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -139,9 +140,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP3, MSG
|
mova128 MSGTMP3, MSG
|
||||||
paddd 7*16-8*16(SHA256CONSTANTS), MSG
|
paddd 7*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP3, XMMTMP4
|
mova128 MSGTMP3, XMMTMP
|
||||||
palignr $4, MSGTMP2, XMMTMP4
|
palignr $4, MSGTMP2, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP0
|
paddd XMMTMP, MSGTMP0
|
||||||
sha256msg2 MSGTMP3, MSGTMP0
|
sha256msg2 MSGTMP3, MSGTMP0
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -151,9 +152,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP0, MSG
|
mova128 MSGTMP0, MSG
|
||||||
paddd 8*16-8*16(SHA256CONSTANTS), MSG
|
paddd 8*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP0, XMMTMP4
|
mova128 MSGTMP0, XMMTMP
|
||||||
palignr $4, MSGTMP3, XMMTMP4
|
palignr $4, MSGTMP3, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP1
|
paddd XMMTMP, MSGTMP1
|
||||||
sha256msg2 MSGTMP0, MSGTMP1
|
sha256msg2 MSGTMP0, MSGTMP1
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -163,9 +164,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP1, MSG
|
mova128 MSGTMP1, MSG
|
||||||
paddd 9*16-8*16(SHA256CONSTANTS), MSG
|
paddd 9*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP1, XMMTMP4
|
mova128 MSGTMP1, XMMTMP
|
||||||
palignr $4, MSGTMP0, XMMTMP4
|
palignr $4, MSGTMP0, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP2
|
paddd XMMTMP, MSGTMP2
|
||||||
sha256msg2 MSGTMP1, MSGTMP2
|
sha256msg2 MSGTMP1, MSGTMP2
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -175,9 +176,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP2, MSG
|
mova128 MSGTMP2, MSG
|
||||||
paddd 10*16-8*16(SHA256CONSTANTS), MSG
|
paddd 10*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP2, XMMTMP4
|
mova128 MSGTMP2, XMMTMP
|
||||||
palignr $4, MSGTMP1, XMMTMP4
|
palignr $4, MSGTMP1, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP3
|
paddd XMMTMP, MSGTMP3
|
||||||
sha256msg2 MSGTMP2, MSGTMP3
|
sha256msg2 MSGTMP2, MSGTMP3
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -187,9 +188,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP3, MSG
|
mova128 MSGTMP3, MSG
|
||||||
paddd 11*16-8*16(SHA256CONSTANTS), MSG
|
paddd 11*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP3, XMMTMP4
|
mova128 MSGTMP3, XMMTMP
|
||||||
palignr $4, MSGTMP2, XMMTMP4
|
palignr $4, MSGTMP2, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP0
|
paddd XMMTMP, MSGTMP0
|
||||||
sha256msg2 MSGTMP3, MSGTMP0
|
sha256msg2 MSGTMP3, MSGTMP0
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -199,9 +200,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP0, MSG
|
mova128 MSGTMP0, MSG
|
||||||
paddd 12*16-8*16(SHA256CONSTANTS), MSG
|
paddd 12*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP0, XMMTMP4
|
mova128 MSGTMP0, XMMTMP
|
||||||
palignr $4, MSGTMP3, XMMTMP4
|
palignr $4, MSGTMP3, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP1
|
paddd XMMTMP, MSGTMP1
|
||||||
sha256msg2 MSGTMP0, MSGTMP1
|
sha256msg2 MSGTMP0, MSGTMP1
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -211,9 +212,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP1, MSG
|
mova128 MSGTMP1, MSG
|
||||||
paddd 13*16-8*16(SHA256CONSTANTS), MSG
|
paddd 13*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP1, XMMTMP4
|
mova128 MSGTMP1, XMMTMP
|
||||||
palignr $4, MSGTMP0, XMMTMP4
|
palignr $4, MSGTMP0, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP2
|
paddd XMMTMP, MSGTMP2
|
||||||
sha256msg2 MSGTMP1, MSGTMP2
|
sha256msg2 MSGTMP1, MSGTMP2
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -222,9 +223,9 @@ sha256_process_block64_shaNI:
|
|||||||
mova128 MSGTMP2, MSG
|
mova128 MSGTMP2, MSG
|
||||||
paddd 14*16-8*16(SHA256CONSTANTS), MSG
|
paddd 14*16-8*16(SHA256CONSTANTS), MSG
|
||||||
sha256rnds2 STATE0, STATE1
|
sha256rnds2 STATE0, STATE1
|
||||||
mova128 MSGTMP2, XMMTMP4
|
mova128 MSGTMP2, XMMTMP
|
||||||
palignr $4, MSGTMP1, XMMTMP4
|
palignr $4, MSGTMP1, XMMTMP
|
||||||
paddd XMMTMP4, MSGTMP3
|
paddd XMMTMP, MSGTMP3
|
||||||
sha256msg2 MSGTMP2, MSGTMP3
|
sha256msg2 MSGTMP2, MSGTMP3
|
||||||
shuf128_32 $0x0E, MSG, MSG
|
shuf128_32 $0x0E, MSG, MSG
|
||||||
sha256rnds2 STATE1, STATE0
|
sha256rnds2 STATE1, STATE0
|
||||||
@ -241,11 +242,11 @@ sha256_process_block64_shaNI:
|
|||||||
paddd CDGH_SAVE, STATE1
|
paddd CDGH_SAVE, STATE1
|
||||||
|
|
||||||
/* Write hash values back in the correct order */
|
/* Write hash values back in the correct order */
|
||||||
shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
|
shuf128_32 $0x1B, STATE0, STATE0 /* FEBA */
|
||||||
shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
|
shuf128_32 $0xB1, STATE1, STATE1 /* DCHG */
|
||||||
mova128 STATE0, XMMTMP4
|
mova128 STATE0, XMMTMP
|
||||||
pblendw $0xF0, STATE1, STATE0 /* DCBA */
|
pblendw $0xF0, STATE1, STATE0 /* DCBA */
|
||||||
palignr $8, XMMTMP4, STATE1 /* HGFE */
|
palignr $8, XMMTMP, STATE1 /* HGFE */
|
||||||
|
|
||||||
movu128 STATE0, 80+0*16(%rdi)
|
movu128 STATE0, 80+0*16(%rdi)
|
||||||
movu128 STATE1, 80+1*16(%rdi)
|
movu128 STATE1, 80+1*16(%rdi)
|
||||||
|
Loading…
Reference in New Issue
Block a user