tls: fix ROL/ROR x86 optimization
ALWAYS_INLINE: function old new delta psAesInitKey 825 824 -1 ROR 5 - -5 setup_mix2 148 134 -14 psAesDecryptBlock 1184 1139 -45 psAesEncryptBlock 1193 1102 -91 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-156) Total: -156 bytes ALWAYS_INLINE + __builtin_constant_p(shift_cnt): function old new delta ROR 5 - -5 psAesInitKey 825 818 -7 setup_mix2 148 123 -25 psAesDecryptBlock 1184 1078 -106 psAesEncryptBlock 1193 1017 -176 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-319) Total: -319 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
432f1ae2ff
commit
f7806f9d8f
@ -7,9 +7,6 @@
|
|||||||
|
|
||||||
/* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h
|
/* The part below is a section of matrixssl-3-7-2b-open/crypto/cryptolib.h
|
||||||
* Changes are flagged with //bbox
|
* Changes are flagged with //bbox
|
||||||
* TODO:
|
|
||||||
* Take a look at "roll %%cl" part... rotates by constant use fewer registers,
|
|
||||||
* and on many Intel CPUs rotates by %cl are slower: they take 2 cycles, not 1.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
@ -28,16 +25,28 @@
|
|||||||
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \
|
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && \
|
||||||
!defined(INTEL_CC) && !defined(PS_NO_ASM)
|
!defined(INTEL_CC) && !defined(PS_NO_ASM)
|
||||||
|
|
||||||
static inline unsigned ROL(unsigned word, int i)
|
static ALWAYS_INLINE unsigned ROL(unsigned word, int i)
|
||||||
{
|
{
|
||||||
|
if (__builtin_constant_p(i)) //box
|
||||||
|
// Rotates by constant use fewer registers,
|
||||||
|
// and on many Intel CPUs rotates by %cl take 2 cycles, not 1.
|
||||||
|
asm ("roll %2,%0" //box
|
||||||
|
:"=r" (word)
|
||||||
|
:"0" (word),"i" (i));
|
||||||
|
else //box
|
||||||
asm ("roll %%cl,%0"
|
asm ("roll %%cl,%0"
|
||||||
:"=r" (word)
|
:"=r" (word)
|
||||||
:"0" (word),"c" (i));
|
:"0" (word),"c" (i));
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned ROR(unsigned word, int i)
|
static ALWAYS_INLINE unsigned ROR(unsigned word, int i)
|
||||||
{
|
{
|
||||||
|
if (__builtin_constant_p(i)) //box
|
||||||
|
asm ("rorl %2,%0" //box
|
||||||
|
:"=r" (word)
|
||||||
|
:"0" (word),"i" (i));
|
||||||
|
else //box
|
||||||
asm ("rorl %%cl,%0"
|
asm ("rorl %%cl,%0"
|
||||||
:"=r" (word)
|
:"=r" (word)
|
||||||
:"0" (word),"c" (i));
|
:"0" (word),"c" (i));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user