top: give inspect search algorithm a significant boost

The Inspect find algorithm has always been challenging
given the possibility that 'rows' might contain binary
data. Be that as it may, two small changes have proven
to dramatically improve the performance of such scans.

The first involves the case wherein if no match on the
'substring' portion of a row was found, then a pointer
representing the substring was increased by the length
of the search string, not the better/longer substring.
Thus, portions of the substring were always rescanned!

The second performance boost was achieved in this way:
pre-scanning each raw row for just the first character
in the search string now determines if a full match is
even possible. Therefore, repeated unproductive strstr
calls on individual substrings within that row will be
avoided. In a nutshell, 1 'if' with '}' did the trick!

(now that we know a '.' + 2 spaces is squeezed to one)
(everything's perfectly justified, but it's just luck)

Signed-off-by: Jim Warner <james.warner@comcast.net>
This commit is contained in:
Jim Warner 2012-11-28 00:00:00 -06:00 committed by Craig Small
parent 2db582e31e
commit 3e7a70396c

View File

@ -2471,11 +2471,8 @@ static void insp_do_pipe (char *fmts, int pid) {
* ( preceeding control chars would consume an unknown amount ) */ * ( preceeding control chars would consume an unknown amount ) */
static void insp_find (int ch, int *col, int *row) { static void insp_find (int ch, int *col, int *row) {
#define reDUX (found) ? N_txt(WORD_another_txt) : "" #define reDUX (found) ? N_txt(WORD_another_txt) : ""
#define begFS (int)(fnd - Insp_p[i])
static char str[SCREENMAX]; static char str[SCREENMAX];
static int found; static int found;
char *fnd, *p;
int i, x, ccur = *col;
if ((ch == '&' || ch == 'n') && !str[0]) { if ((ch == '&' || ch == 'n') && !str[0]) {
show_msg(N_txt(FIND_no_next_txt)); show_msg(N_txt(FIND_no_next_txt));
@ -2486,29 +2483,33 @@ static void insp_find (int ch, int *col, int *row) {
found = 0; found = 0;
} }
if (str[0]) { if (str[0]) {
int i, xx, yy;
INSP_BUSY; INSP_BUSY;
for (i = *row; i < Insp_nl; ) { for (xx = *col, yy = *row; yy < Insp_nl; ) {
fnd = NULL; // because our glob might // let's skip this entire row, if there's no chance of a match
for (x = ccur +1; x < INSP_RLEN(i); x++) { // be raw binary data, we if (memchr(Insp_p[yy], str[0], INSP_RLEN(yy))) {
if (!*(p = Insp_p[i] + x)) // could encounter a '\0' char *p, *fnd = NULL;
continue; // in what we view as the for (i = xx; i < INSP_RLEN(yy); i++) {
if ((fnd = STRSTR(p, str))) // 'row' -- so we'll have if (!*(p = Insp_p[yy] + i)) // skip any empty strings
break; // to search it in chunks continue;
x += strlen(str); // ... if ((fnd = STRSTR(p, str))) // with binary data, each
} // and, account for maybe break; // row may have '\0'. so
if (fnd && fnd < Insp_p[i +1]) { // overrunning that 'row' i += strlen(p); // our scans must be done
found = 1; } // in chunks, and we must
*row = i; if (fnd && fnd < Insp_p[yy + 1]) { // guard against overrun!
*col = begFS; found = 1;
return; if (xx == *col) { ++xx; continue; } // matched where we were!
*row = yy; // ( tried to fool top? )
*col = (int)(fnd - Insp_p[yy]);
return;
}
} }
++i; xx = 0;
ccur = 0; ++yy;
} }
show_msg(fmtmk(N_fmt(FIND_no_find_fmt), reDUX, str)); show_msg(fmtmk(N_fmt(FIND_no_find_fmt), reDUX, str));
} }
#undef reDUX #undef reDUX
#undef begFS
} // end: insp_find } // end: insp_find