Denys Vlasenko 39369ff460 libbb/sha1: use SSE2 in unrolled x86-64 code. ~10% faster
function                                             old     new   delta
.rodata                                           108241  108305     +64
sha1_process_block64                                3502    3495      -7
(add/remove: 5/0 grow/shrink: 1/1 up/down: 64/-7)              Total: 57 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2022-01-23 12:57:27 +01:00

1414 lines
47 KiB

### Generated by hash_md5_sha_x86-64.S.sh ###
#if CONFIG_SHA1_SMALL == 0 && defined(__GNUC__) && defined(__x86_64__)
.section .text.sha1_process_block64,"ax",@progbits
.globl sha1_process_block64
.hidden sha1_process_block64
.type sha1_process_block64, @function
.balign 8 # allow decoders to fetch at least 5 first insns
pushq %rbp # 1 byte insn
pushq %rbx # 1 byte insn
pushq %r15 # 2 byte insn
pushq %r14 # 2 byte insn
pushq %r13 # 2 byte insn
pushq %r12 # 2 byte insn
pushq %rdi # we need ctx at the end
#Register and stack use:
# eax..edx: a..d
# ebp: e
# esi,edi: temps
# xmm0..xmm3: W[]
# xmm4,xmm5: temps
# xmm6: current round constant
# -64(%rsp): area for passing RCONST + W[] from vector to integer units
movl 80(%rdi), %eax # a = ctx->hash[0]
movl 84(%rdi), %ebx # b = ctx->hash[1]
movl 88(%rdi), %ecx # c = ctx->hash[2]
movl 92(%rdi), %edx # d = ctx->hash[3]
movl 96(%rdi), %ebp # e = ctx->hash[4]
movaps rconst0x5A827999(%rip), %xmm6
# For round 1, steps 0 and 8..15, we pass W[0,8..15] in esi,r8..r15
# instead of spilling them to stack.
# (We lose parallelized addition of RCONST, but LEA
# can do two additions at once, so...)
movq 4*0(%rdi), %rsi
movq 4*2(%rdi), %r10
bswapq %rsi
bswapq %r10
rolq $32, %rsi # rsi = W[1]:W[0]
rolq $32, %r10
movq %rsi, %xmm0
movq %r10, %xmm4
punpcklqdq %xmm4, %xmm0 # xmm0 = r10:rsi = (W[0],W[1],W[2],W[3])
movaps %xmm0, %xmm4
paddd %xmm6, %xmm4
movups %xmm4, -64+4*0(%rsp)
movq 4*4(%rdi), %r8
movq 4*6(%rdi), %r10
bswapq %r8
bswapq %r10
rolq $32, %r8
rolq $32, %r10
movq %r8, %xmm1
movq %r10, %xmm4
punpcklqdq %xmm4, %xmm1 # xmm1 = r10:r8 = (W[4],W[5],W[6],W[7])
movaps %xmm1, %xmm4
paddd %xmm6, %xmm4
movups %xmm4, -64+4*4(%rsp)
movq 4*8(%rdi), %r8
movq 4*10(%rdi), %r10
bswapq %r8
bswapq %r10
movl %r8d, %r9d # r9d = W[9]
rolq $32, %r8 # r8 = W[9]:W[8]
movl %r10d, %r11d # r11d = W[11]
rolq $32, %r10 # r10 = W[11]:W[10]
movq %r8, %xmm2
movq %r10, %xmm4
punpcklqdq %xmm4, %xmm2 # xmm2 = r10:r8 = (W[8],W[9],W[10],W[11])
movq 4*12(%rdi), %r12
movq 4*14(%rdi), %r14
bswapq %r12
bswapq %r14
movl %r12d, %r13d # r13d = W[13]
rolq $32, %r12 # r12 = W[13]:W[12]
movl %r14d, %r15d # r15d = W[15]
rolq $32, %r14 # r14 = W[15]:W[14]
movq %r12, %xmm3
movq %r14, %xmm4
punpcklqdq %xmm4, %xmm3 # xmm3 = r14:r12 = (W[12],W[13],W[14],W[15])
# 0
leal 0x5A827999(%rbp,%rsi), %ebp # e += RCONST + W[n]
movl %ecx, %edi # c
xorl %edx, %edi # ^d
andl %ebx, %edi # &b
xorl %edx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 1
addl -64+4*1(%rsp), %edx # e += RCONST + W[n]
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
andl %eax, %edi # &b
xorl %ecx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %edx # e += (((c ^ d) & b) ^ d)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 2
addl -64+4*2(%rsp), %ecx # e += RCONST + W[n]
movl %eax, %edi # c
xorl %ebx, %edi # ^d
andl %ebp, %edi # &b
xorl %ebx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 3
addl -64+4*3(%rsp), %ebx # e += RCONST + W[n]
movl %ebp, %edi # c
xorl %eax, %edi # ^d
andl %edx, %edi # &b
xorl %eax, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 4
addl -64+4*4(%rsp), %eax # e += RCONST + W[n]
movl %edx, %edi # c
xorl %ebp, %edi # ^d
andl %ecx, %edi # &b
xorl %ebp, %edi # (((c ^ d) & b) ^ d)
addl %edi, %eax # e += (((c ^ d) & b) ^ d)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 5
addl -64+4*5(%rsp), %ebp # e += RCONST + W[n]
movl %ecx, %edi # c
xorl %edx, %edi # ^d
andl %ebx, %edi # &b
xorl %edx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 6
addl -64+4*6(%rsp), %edx # e += RCONST + W[n]
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
andl %eax, %edi # &b
xorl %ecx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %edx # e += (((c ^ d) & b) ^ d)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 7
addl -64+4*7(%rsp), %ecx # e += RCONST + W[n]
movl %eax, %edi # c
xorl %ebx, %edi # ^d
andl %ebp, %edi # &b
xorl %ebx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# PREP %xmm0 %xmm1 %xmm2 %xmm3 -64+16*0(%rsp)
movaps %xmm3, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm0, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm1, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm2, %xmm0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm0 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm0, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm0, %xmm0 # shift left by 1
psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm0 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm0 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm0, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*0(%rsp)
# 8
leal 0x5A827999(%rbx,%r8), %ebx # e += RCONST + W[n]
movl %ebp, %edi # c
xorl %eax, %edi # ^d
andl %edx, %edi # &b
xorl %eax, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 9
leal 0x5A827999(%rax,%r9), %eax # e += RCONST + W[n]
movl %edx, %edi # c
xorl %ebp, %edi # ^d
andl %ecx, %edi # &b
xorl %ebp, %edi # (((c ^ d) & b) ^ d)
addl %edi, %eax # e += (((c ^ d) & b) ^ d)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 10
leal 0x5A827999(%rbp,%r10), %ebp # e += RCONST + W[n]
movl %ecx, %edi # c
xorl %edx, %edi # ^d
andl %ebx, %edi # &b
xorl %edx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 11
leal 0x5A827999(%rdx,%r11), %edx # e += RCONST + W[n]
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
andl %eax, %edi # &b
xorl %ecx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %edx # e += (((c ^ d) & b) ^ d)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
movaps rconst0x6ED9EBA1(%rip), %xmm6
# PREP %xmm1 %xmm2 %xmm3 %xmm0 -64+16*1(%rsp)
movaps %xmm0, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm1, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm2, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm3, %xmm1 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm1 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm1, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm1, %xmm1 # shift left by 1
psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm1 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm1 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm1, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*1(%rsp)
# 12
leal 0x5A827999(%rcx,%r12), %ecx # e += RCONST + W[n]
movl %eax, %edi # c
xorl %ebx, %edi # ^d
andl %ebp, %edi # &b
xorl %ebx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 13
leal 0x5A827999(%rbx,%r13), %ebx # e += RCONST + W[n]
movl %ebp, %edi # c
xorl %eax, %edi # ^d
andl %edx, %edi # &b
xorl %eax, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 14
leal 0x5A827999(%rax,%r14), %eax # e += RCONST + W[n]
movl %edx, %edi # c
xorl %ebp, %edi # ^d
andl %ecx, %edi # &b
xorl %ebp, %edi # (((c ^ d) & b) ^ d)
addl %edi, %eax # e += (((c ^ d) & b) ^ d)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 15
leal 0x5A827999(%rbp,%r15), %ebp # e += RCONST + W[n]
movl %ecx, %edi # c
xorl %edx, %edi # ^d
andl %ebx, %edi # &b
xorl %edx, %edi # (((c ^ d) & b) ^ d)
addl %edi, %ebp # e += (((c ^ d) & b) ^ d)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# PREP %xmm2 %xmm3 %xmm0 %xmm1 -64+16*2(%rsp)
movaps %xmm1, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm2, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm3, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm0, %xmm2 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm2 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm2, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm2, %xmm2 # shift left by 1
psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm2 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm2 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm2, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*2(%rsp)
# 16
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
andl %eax, %edi # &b
xorl %ecx, %edi # (((c ^ d) & b) ^ d)
addl -64+4*0(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (((c ^ d) & b) ^ d)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 17
movl %eax, %edi # c
xorl %ebx, %edi # ^d
andl %ebp, %edi # &b
xorl %ebx, %edi # (((c ^ d) & b) ^ d)
addl -64+4*1(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (((c ^ d) & b) ^ d)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 18
movl %ebp, %edi # c
xorl %eax, %edi # ^d
andl %edx, %edi # &b
xorl %eax, %edi # (((c ^ d) & b) ^ d)
addl -64+4*2(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (((c ^ d) & b) ^ d)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 19
movl %edx, %edi # c
xorl %ebp, %edi # ^d
andl %ecx, %edi # &b
xorl %ebp, %edi # (((c ^ d) & b) ^ d)
addl -64+4*3(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (((c ^ d) & b) ^ d)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# PREP %xmm3 %xmm0 %xmm1 %xmm2 -64+16*3(%rsp)
movaps %xmm2, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm3, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm0, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm1, %xmm3 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm3 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm3, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm3, %xmm3 # shift left by 1
psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm3 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm3 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm3, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*3(%rsp)
# 20
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*4(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 21
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*5(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 22
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*6(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 23
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*7(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# PREP %xmm0 %xmm1 %xmm2 %xmm3 -64+16*0(%rsp)
movaps %xmm3, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm0, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm1, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm2, %xmm0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm0 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm0, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm0, %xmm0 # shift left by 1
psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm0 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm0 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm0, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*0(%rsp)
# 24
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*8(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 25
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*9(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 26
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*10(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 27
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*11(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# PREP %xmm1 %xmm2 %xmm3 %xmm0 -64+16*1(%rsp)
movaps %xmm0, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm1, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm2, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm3, %xmm1 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm1 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm1, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm1, %xmm1 # shift left by 1
psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm1 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm1 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm1, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*1(%rsp)
# 28
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*12(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 29
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*13(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 30
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*14(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 31
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*15(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
movaps rconst0x8F1BBCDC(%rip), %xmm6
# PREP %xmm2 %xmm3 %xmm0 %xmm1 -64+16*2(%rsp)
movaps %xmm1, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm2, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm3, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm0, %xmm2 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm2 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm2, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm2, %xmm2 # shift left by 1
psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm2 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm2 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm2, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*2(%rsp)
# 32
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*0(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 33
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*1(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 34
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*2(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 35
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*3(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# PREP %xmm3 %xmm0 %xmm1 %xmm2 -64+16*3(%rsp)
movaps %xmm2, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm3, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm0, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm1, %xmm3 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm3 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm3, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm3, %xmm3 # shift left by 1
psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm3 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm3 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm3, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*3(%rsp)
# 36
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*4(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 37
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*5(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 38
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*6(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 39
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*7(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# PREP %xmm0 %xmm1 %xmm2 %xmm3 -64+16*0(%rsp)
movaps %xmm3, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm0, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm1, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm2, %xmm0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm0 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm0, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm0, %xmm0 # shift left by 1
psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm0 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm0 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm0, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*0(%rsp)
# 40
movl %ebx, %edi # di: b
movl %ebx, %esi # si: b
orl %ecx, %edi # di: b | c
andl %ecx, %esi # si: b & c
andl %edx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebp # += ((b | c) & d) | (b & c)
addl -64+4*8(%rsp), %ebp # e += RCONST + W[n & 15]
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 41
movl %eax, %edi # di: b
movl %eax, %esi # si: b
orl %ebx, %edi # di: b | c
andl %ebx, %esi # si: b & c
andl %ecx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %edx # += ((b | c) & d) | (b & c)
addl -64+4*9(%rsp), %edx # e += RCONST + W[n & 15]
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 42
movl %ebp, %edi # di: b
movl %ebp, %esi # si: b
orl %eax, %edi # di: b | c
andl %eax, %esi # si: b & c
andl %ebx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ecx # += ((b | c) & d) | (b & c)
addl -64+4*10(%rsp), %ecx # e += RCONST + W[n & 15]
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 43
movl %edx, %edi # di: b
movl %edx, %esi # si: b
orl %ebp, %edi # di: b | c
andl %ebp, %esi # si: b & c
andl %eax, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebx # += ((b | c) & d) | (b & c)
addl -64+4*11(%rsp), %ebx # e += RCONST + W[n & 15]
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# PREP %xmm1 %xmm2 %xmm3 %xmm0 -64+16*1(%rsp)
movaps %xmm0, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm1, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm2, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm3, %xmm1 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm1 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm1, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm1, %xmm1 # shift left by 1
psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm1 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm1 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm1, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*1(%rsp)
# 44
movl %ecx, %edi # di: b
movl %ecx, %esi # si: b
orl %edx, %edi # di: b | c
andl %edx, %esi # si: b & c
andl %ebp, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %eax # += ((b | c) & d) | (b & c)
addl -64+4*12(%rsp), %eax # e += RCONST + W[n & 15]
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 45
movl %ebx, %edi # di: b
movl %ebx, %esi # si: b
orl %ecx, %edi # di: b | c
andl %ecx, %esi # si: b & c
andl %edx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebp # += ((b | c) & d) | (b & c)
addl -64+4*13(%rsp), %ebp # e += RCONST + W[n & 15]
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 46
movl %eax, %edi # di: b
movl %eax, %esi # si: b
orl %ebx, %edi # di: b | c
andl %ebx, %esi # si: b & c
andl %ecx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %edx # += ((b | c) & d) | (b & c)
addl -64+4*14(%rsp), %edx # e += RCONST + W[n & 15]
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 47
movl %ebp, %edi # di: b
movl %ebp, %esi # si: b
orl %eax, %edi # di: b | c
andl %eax, %esi # si: b & c
andl %ebx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ecx # += ((b | c) & d) | (b & c)
addl -64+4*15(%rsp), %ecx # e += RCONST + W[n & 15]
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# PREP %xmm2 %xmm3 %xmm0 %xmm1 -64+16*2(%rsp)
movaps %xmm1, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm2, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm3, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm0, %xmm2 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm2 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm2, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm2, %xmm2 # shift left by 1
psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm2 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm2 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm2, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*2(%rsp)
# 48
movl %edx, %edi # di: b
movl %edx, %esi # si: b
orl %ebp, %edi # di: b | c
andl %ebp, %esi # si: b & c
andl %eax, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebx # += ((b | c) & d) | (b & c)
addl -64+4*0(%rsp), %ebx # e += RCONST + W[n & 15]
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 49
movl %ecx, %edi # di: b
movl %ecx, %esi # si: b
orl %edx, %edi # di: b | c
andl %edx, %esi # si: b & c
andl %ebp, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %eax # += ((b | c) & d) | (b & c)
addl -64+4*1(%rsp), %eax # e += RCONST + W[n & 15]
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 50
movl %ebx, %edi # di: b
movl %ebx, %esi # si: b
orl %ecx, %edi # di: b | c
andl %ecx, %esi # si: b & c
andl %edx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebp # += ((b | c) & d) | (b & c)
addl -64+4*2(%rsp), %ebp # e += RCONST + W[n & 15]
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 51
movl %eax, %edi # di: b
movl %eax, %esi # si: b
orl %ebx, %edi # di: b | c
andl %ebx, %esi # si: b & c
andl %ecx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %edx # += ((b | c) & d) | (b & c)
addl -64+4*3(%rsp), %edx # e += RCONST + W[n & 15]
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
movaps rconst0xCA62C1D6(%rip), %xmm6
# PREP %xmm3 %xmm0 %xmm1 %xmm2 -64+16*3(%rsp)
movaps %xmm2, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm3, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm0, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm1, %xmm3 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm3 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm3, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm3, %xmm3 # shift left by 1
psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm3 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm3 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm3, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*3(%rsp)
# 52
movl %ebp, %edi # di: b
movl %ebp, %esi # si: b
orl %eax, %edi # di: b | c
andl %eax, %esi # si: b & c
andl %ebx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ecx # += ((b | c) & d) | (b & c)
addl -64+4*4(%rsp), %ecx # e += RCONST + W[n & 15]
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 53
movl %edx, %edi # di: b
movl %edx, %esi # si: b
orl %ebp, %edi # di: b | c
andl %ebp, %esi # si: b & c
andl %eax, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebx # += ((b | c) & d) | (b & c)
addl -64+4*5(%rsp), %ebx # e += RCONST + W[n & 15]
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 54
movl %ecx, %edi # di: b
movl %ecx, %esi # si: b
orl %edx, %edi # di: b | c
andl %edx, %esi # si: b & c
andl %ebp, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %eax # += ((b | c) & d) | (b & c)
addl -64+4*6(%rsp), %eax # e += RCONST + W[n & 15]
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 55
movl %ebx, %edi # di: b
movl %ebx, %esi # si: b
orl %ecx, %edi # di: b | c
andl %ecx, %esi # si: b & c
andl %edx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebp # += ((b | c) & d) | (b & c)
addl -64+4*7(%rsp), %ebp # e += RCONST + W[n & 15]
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# PREP %xmm0 %xmm1 %xmm2 %xmm3 -64+16*0(%rsp)
movaps %xmm3, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm0, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm1, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm2, %xmm0 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm0 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm0, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm0, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm0, %xmm0 # shift left by 1
psubd %xmm4, %xmm0 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm0 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm0 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm0, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*0(%rsp)
# 56
movl %eax, %edi # di: b
movl %eax, %esi # si: b
orl %ebx, %edi # di: b | c
andl %ebx, %esi # si: b & c
andl %ecx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %edx # += ((b | c) & d) | (b & c)
addl -64+4*8(%rsp), %edx # e += RCONST + W[n & 15]
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 57
movl %ebp, %edi # di: b
movl %ebp, %esi # si: b
orl %eax, %edi # di: b | c
andl %eax, %esi # si: b & c
andl %ebx, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ecx # += ((b | c) & d) | (b & c)
addl -64+4*9(%rsp), %ecx # e += RCONST + W[n & 15]
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 58
movl %edx, %edi # di: b
movl %edx, %esi # si: b
orl %ebp, %edi # di: b | c
andl %ebp, %esi # si: b & c
andl %eax, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %ebx # += ((b | c) & d) | (b & c)
addl -64+4*10(%rsp), %ebx # e += RCONST + W[n & 15]
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 59
movl %ecx, %edi # di: b
movl %ecx, %esi # si: b
orl %edx, %edi # di: b | c
andl %edx, %esi # si: b & c
andl %ebp, %edi # di: (b | c) & d
orl %esi, %edi # ((b | c) & d) | (b & c)
addl %edi, %eax # += ((b | c) & d) | (b & c)
addl -64+4*11(%rsp), %eax # e += RCONST + W[n & 15]
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# PREP %xmm1 %xmm2 %xmm3 %xmm0 -64+16*1(%rsp)
movaps %xmm0, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm1, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm2, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm3, %xmm1 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm1 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm1, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm1, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm1, %xmm1 # shift left by 1
psubd %xmm4, %xmm1 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm1 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm1 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm1, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*1(%rsp)
# 60
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*12(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 61
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*13(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 62
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*14(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 63
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*15(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# PREP %xmm2 %xmm3 %xmm0 %xmm1 -64+16*2(%rsp)
movaps %xmm1, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm2, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm3, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm0, %xmm2 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm2 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm2, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm2, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm2, %xmm2 # shift left by 1
psubd %xmm4, %xmm2 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm2 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm2 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm2, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*2(%rsp)
# 64
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*0(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 65
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*1(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 66
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*2(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 67
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*3(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# PREP %xmm3 %xmm0 %xmm1 %xmm2 -64+16*3(%rsp)
movaps %xmm2, %xmm4
psrldq $4, %xmm4 # rshift by 4 bytes: T1 = ([13],[14],[15],0)
pshufd $0x4e, %xmm3, %xmm5 # 01001110=2,3,0,1 shuffle, ([2],[3],x,x)
punpcklqdq %xmm0, %xmm5 # T2 = W4[0..63]:T2[0..63] = ([2],[3],[4],[5])
xorps %xmm1, %xmm3 # ([8],[9],[10],[11]) ^ ([0],[1],[2],[3])
xorps %xmm4, %xmm5 # ([13],[14],[15],0) ^ ([2],[3],[4],[5])
xorps %xmm5, %xmm3 # ^
# W0 = unrotated (W[0]..W[3]), still needs W[3] fixup
movaps %xmm3, %xmm5
xorps %xmm4, %xmm4 # rol(W0,1):
pcmpgtd %xmm3, %xmm4 # ffffffff for elements <0 (ones with msb bit 1)
paddd %xmm3, %xmm3 # shift left by 1
psubd %xmm4, %xmm3 # add 1 to those who had msb bit 1
# W0 = rotated (W[0]..W[3]), still needs W[3] fixup
pslldq $12, %xmm5 # lshift by 12 bytes: T2 = (0,0,0,unrotW[0])
movaps %xmm5, %xmm4
pslld $2, %xmm5
psrld $30, %xmm4
# xorps %xmm4, %xmm5 # rol((0,0,0,unrotW[0]),2)
xorps %xmm4, %xmm3 # same result, but does not depend on/does not modify T2
xorps %xmm5, %xmm3 # W0 = rol(W[0]..W[3],1) ^ (0,0,0,rol(unrotW[0],2))
movaps %xmm3, %xmm5
paddd %xmm6, %xmm5
movups %xmm5, -64+16*3(%rsp)
# 68
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*4(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 69
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*5(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 70
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*6(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 71
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*7(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 72
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*8(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 73
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*9(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 74
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*10(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
# 75
movl %ecx, %edi # c
xorl %edx, %edi # ^d
xorl %ebx, %edi # ^b
addl -64+4*11(%rsp), %ebp # e += RCONST + W[n & 15]
addl %edi, %ebp # e += (c ^ d ^ b)
movl %eax, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebp # e += rotl32(a,5)
rorl $2, %ebx # b = rotl32(b,30)
# 76
movl %ebx, %edi # c
xorl %ecx, %edi # ^d
xorl %eax, %edi # ^b
addl -64+4*12(%rsp), %edx # e += RCONST + W[n & 15]
addl %edi, %edx # e += (c ^ d ^ b)
movl %ebp, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %edx # e += rotl32(a,5)
rorl $2, %eax # b = rotl32(b,30)
# 77
movl %eax, %edi # c
xorl %ebx, %edi # ^d
xorl %ebp, %edi # ^b
addl -64+4*13(%rsp), %ecx # e += RCONST + W[n & 15]
addl %edi, %ecx # e += (c ^ d ^ b)
movl %edx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ecx # e += rotl32(a,5)
rorl $2, %ebp # b = rotl32(b,30)
# 78
movl %ebp, %edi # c
xorl %eax, %edi # ^d
xorl %edx, %edi # ^b
addl -64+4*14(%rsp), %ebx # e += RCONST + W[n & 15]
addl %edi, %ebx # e += (c ^ d ^ b)
movl %ecx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %ebx # e += rotl32(a,5)
rorl $2, %edx # b = rotl32(b,30)
# 79
movl %edx, %edi # c
xorl %ebp, %edi # ^d
xorl %ecx, %edi # ^b
addl -64+4*15(%rsp), %eax # e += RCONST + W[n & 15]
addl %edi, %eax # e += (c ^ d ^ b)
movl %ebx, %esi #
roll $5, %esi # rotl32(a,5)
addl %esi, %eax # e += rotl32(a,5)
rorl $2, %ecx # b = rotl32(b,30)
popq %rdi #
popq %r12 #
addl %eax, 80(%rdi) # ctx->hash[0] += a
popq %r13 #
addl %ebx, 84(%rdi) # ctx->hash[1] += b
popq %r14 #
addl %ecx, 88(%rdi) # ctx->hash[2] += c
popq %r15 #
addl %edx, 92(%rdi) # ctx->hash[3] += d
popq %rbx #
addl %ebp, 96(%rdi) # ctx->hash[4] += e
popq %rbp #
.size sha1_process_block64, .-sha1_process_block64
.section .rodata.cst16.sha1const, "aM", @progbits, 16
.align 16
.long 0x5A827999
.long 0x5A827999
.long 0x5A827999
.long 0x5A827999
.long 0x6ED9EBA1
.long 0x6ED9EBA1
.long 0x6ED9EBA1
.long 0x6ED9EBA1
.long 0x8F1BBCDC
.long 0x8F1BBCDC
.long 0x8F1BBCDC
.long 0x8F1BBCDC
.long 0xCA62C1D6
.long 0xCA62C1D6
.long 0xCA62C1D6
.long 0xCA62C1D6