factor: detect squares

If we have a square, the speedup can be extremely large
(in best case example below, it's ~40000 times faster):

$ time ./busybox_old factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real	0m4.246s
$ time ./busybox factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real	0m0.000s

function                                             old     new   delta
isqrt_odd                                              -      57     +57
print_w                                                -      36     +36
factorize                                            218     236     +18
print_h                                                -       7      +7
factorize_numstr                                      65      72      +7
------------------------------------------------------------------------------
(add/remove: 3/0 grow/shrink: 2/0 up/down: 125/0)             Total: 125 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2020-12-22 20:08:40 +01:00
parent 96717d9fb4
commit 6452c30036
2 changed files with 62 additions and 3 deletions

View File

@ -85,7 +85,8 @@ static const uint64_t packed_wheel[] = {
#undef R #undef R
#define WHEEL_START 5 #define WHEEL_START 5
#define WHEEL_SIZE (5 + 24 * 20) #define WHEEL_SIZE (5 + 24 * 20)
#define wheel_tab ((uint8_t*)&bb_common_bufsiz1) #define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
#define wheel_tab (((uint8_t*)&bb_common_bufsiz1) + 1)
/* /*
* Why, you ask? * Why, you ask?
* plain byte array: * plain byte array:
@ -119,9 +120,39 @@ static void unpack_wheel(void)
} }
} }
/* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
static NOINLINE void print_w(wide_t n)
{
unsigned rep = square_count;
do
printf(" %llu", n);
while (--rep != 0);
}
static NOINLINE void print_h(half_t n)
{
print_w(n);
}
static void factorize(wide_t N);
static half_t isqrt_odd(wide_t N) static half_t isqrt_odd(wide_t N)
{ {
half_t s = isqrt(N); half_t s = isqrt(N);
/* s^2 is <= N, (s+1)^2 > N */
/* If s^2 in fact is EQUAL to N, it's very lucky.
* Examples:
* factor 18446743988964486098 = 2 * 3037000493 * 3037000493
* factor 18446743902517389507 = 3 * 2479700513 * 2479700513
*/
if ((wide_t)s * s == N) {
/* factorize sqrt(N), printing each factor twice */
square_count *= 2;
factorize(s);
/* Let caller know we recursed */
return 0;
}
/* Subtract 1 from even s, odd s won't change: */ /* Subtract 1 from even s, odd s won't change: */
/* (doesnt work for zero, but we know that s != 0 here) */ /* (doesnt work for zero, but we know that s != 0 here) */
s = (s - 1) | 1; s = (s - 1) | 1;
@ -152,6 +183,8 @@ static NOINLINE void factorize(wide_t N)
* factor 18446744073709551557 (0xffffffffffffffc5). * factor 18446744073709551557 (0xffffffffffffffc5).
*/ */
max_factor = isqrt_odd(N); max_factor = isqrt_odd(N);
if (!max_factor)
return; /* square was detected and recursively factored */
factor = 2; factor = 2;
w = 0; w = 0;
for (;;) { for (;;) {
@ -162,8 +195,10 @@ static NOINLINE void factorize(wide_t N)
*/ */
while ((N % factor) == 0) { /* not likely */ while ((N % factor) == 0) { /* not likely */
N = N / factor; N = N / factor;
printf(" %"HALF_FMT"u", factor); print_h(factor);
max_factor = isqrt_odd(N); max_factor = isqrt_odd(N);
if (!max_factor)
return; /* square was detected */
} }
if (factor >= max_factor) if (factor >= max_factor)
break; break;
@ -178,7 +213,7 @@ static NOINLINE void factorize(wide_t N)
} }
end: end:
if (N > 1) if (N > 1)
printf(" %llu", N); print_w(N);
bb_putchar('\n'); bb_putchar('\n');
} }
@ -193,6 +228,7 @@ static void factorize_numstr(const char *numstr)
if (errno) if (errno)
bb_show_usage(); bb_show_usage();
printf("%llu:", N); printf("%llu:", N);
square_count = 1;
factorize(N); factorize(N);
} }

View File

@ -45,4 +45,27 @@ testing "factor \$((2*3*5*7*11*13*17*19*23*29*31*37*41*43*47))" \
"614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \ "614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \
"" "" "" ""
# Test that square-detection code is not buggy
testing "factor 2 * 3037000493 * 3037000493" \
"factor 18446743988964486098" \
"18446743988964486098: 2 3037000493 3037000493\n" \
"" ""
testing "factor 3 * 2479700513 * 2479700513" \
"factor 18446743902517389507" \
"18446743902517389507: 3 2479700513 2479700513\n" \
"" ""
# including square-of-square cases:
testing "factor 3 * 37831 * 37831 * 37831 * 37831" \
"factor 6144867742934288163" \
"6144867742934288163: 3 37831 37831 37831 37831\n" \
"" ""
testing "factor 3 * 13^16" \
"factor 1996249827549539523" \
"1996249827549539523: 3 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \
"" ""
testing "factor 13^16" \
"factor 665416609183179841" \
"665416609183179841: 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \
"" ""
exit $FAILCOUNT exit $FAILCOUNT