Brand, spankin', new grep that uses libc regex routines instead of the

hand-rolled ones. Sed still needs to be replaced and then the regexp stuff can
be axed.
This commit is contained in:
Mark Whitley 2000-06-28 22:00:26 +00:00
parent 268b8c4f38
commit d37218941c
2 changed files with 316 additions and 206 deletions

View File

@ -1,10 +1,8 @@
/* vi: set sw=4 ts=4: */
/* /*
* Mini grep implementation for busybox * Mini grep implementation for busybox using libc regex.
*
* *
* Copyright (C) 1999,2000 by Lineo, inc. * Copyright (C) 1999,2000 by Lineo, inc.
* Written by Erik Andersen <andersen@lineo.com>, <andersee@debian.org> * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -22,149 +20,206 @@
* *
*/ */
/*
18-Dec-1999 Konstantin Boldyshev <konst@voshod.com>
+ -q option (be quiet)
+ exit code depending on grep result (TRUE or FALSE)
(useful for scripts)
*/
#include "internal.h"
#include "regexp.h"
#include <stdio.h> #include <stdio.h>
#include <dirent.h> #include <stdlib.h>
#include <unistd.h> /* for getopt() */
#include <regex.h>
#include <string.h> /* for strerror() */
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include "internal.h"
#include <signal.h>
#include <time.h> extern int optind; /* in unistd.h */
#include <ctype.h> extern int errno; /* for use with strerror() */
#define BB_DECLARE_EXTERN
#define bb_need_too_few_args
#include "messages.c"
static const char grep_usage[] = static const char grep_usage[] =
"grep [OPTIONS]... PATTERN [FILE]...\n" "grep [-ihHnqvs] pattern [files...]\n"
#ifndef BB_FEATURE_TRIVIAL_HELP #ifndef BB_FEATURE_TRIVIAL_HELP
"\nSearch for PATTERN in each FILE or standard input.\n\n" "\nSearch for PATTERN in each FILE or standard input.\n\n"
"OPTIONS:\n" "OPTIONS:\n"
"\t-H\tprefix output lines with filename where match was found\n"
"\t-h\tsuppress the prefixing filename on output\n" "\t-h\tsuppress the prefixing filename on output\n"
"\t-i\tignore case distinctions\n" "\t-i\tignore case distinctions\n"
"\t-n\tprint line number with output lines\n" "\t-n\tprint line number with output lines\n"
"\t-q\tbe quiet. Returns 0 if result was found, 1 otherwise\n" "\t-q\tbe quiet. Returns 0 if result was found, 1 otherwise\n"
"\t-v\tselect non-matching lines\n\n" "\t-v\tselect non-matching lines\n"
#if defined BB_REGEXP "\t-s\tsuppress file open/read error messages\n\n"
"This version of grep matches full regular expressions.\n";
#else
"This version of grep matches strings (not regular expressions).\n"
#endif
#endif #endif
; ;
static int match = FALSE, beQuiet = FALSE; static const int GROWBY = 80; /* how large we will grow strings by */
static void do_grep(FILE * fp, char *needle, char *fileName, int tellName, /* options */
int ignoreCase, int tellLine, int invertSearch) static int ignore_case = 0;
static int print_filename = 0;
static int print_line_num = 0;
static int be_quiet = 0;
static int invert_search = 0;
static int suppress_err_msgs = 0;
/* globals */
static regex_t regex; /* storage space for compiled regular expression */
static int nmatches = 0; /* keeps track of the number of matches */
static char *cur_file = NULL; /* the current file we are reading */
/* This returns a malloc'ed char * which must be stored and free'ed */
/* XXX: This function should probably go in a 'common'/'util'/'misc' file
* somewhere so it can be used by other folks. */
static char *get_line_from_file(FILE *file)
{ {
long line = 0; int ch;
char *haystack; int idx = 0;
int truth = !invertSearch; char *linebuf = NULL;
int linebufsz = 0;
while ((haystack = cstring_lineFromFile(fp))) { while (1) {
line++; ch = fgetc(file);
if (find_match(haystack, needle, ignoreCase) == truth) { if (ch == EOF)
if (tellName == TRUE) break;
printf("%s:", fileName); /* grow the line buffer as necessary */
if (idx > linebufsz-1)
if (tellLine == TRUE) linebuf = realloc(linebuf, linebufsz += GROWBY);
printf("%ld:", line); linebuf[idx++] = (char)ch;
if ((char)ch == '\n')
if (beQuiet == FALSE) break;
fputs(haystack, stdout);
match = TRUE;
}
free(haystack);
} }
if (idx == 0)
return NULL;
linebuf[idx] = 0;
return linebuf;
} }
static void print_matched_line(char *line, int linenum)
{
if (print_filename)
printf("%s:", cur_file);
if (print_line_num)
printf("%i:", linenum);
printf("%s", line);
}
static void grep_file(FILE *file)
{
char *line = NULL;
int ret;
int linenum = 0;
while ((line = get_line_from_file(file)) != NULL) {
linenum++;
ret = regexec(&regex, line, 0, NULL, 0);
if (ret == 0 && !invert_search) { /* match */
/* if we found a match but were told to be quiet, stop here and
* return success */
if (be_quiet) {
regfree(&regex);
exit(0);
}
nmatches++;
print_matched_line(line, linenum);
} else if (ret == REG_NOMATCH && invert_search) {
print_matched_line(line, linenum);
}
free(line);
}
}
extern int grep_main(int argc, char **argv) extern int grep_main(int argc, char **argv)
{ {
FILE *fp; int opt;
char *needle; int reflags;
char *fileName; int ret;
int tellName = TRUE;
int ignoreCase = FALSE;
int tellLine = FALSE;
int invertSearch = FALSE;
if (argc < 1) { /* do special-case option parsing */
if (argv[1] && (strcmp(argv[1], "--help") == 0))
usage(grep_usage); usage(grep_usage);
}
argv++;
while (--argc >= 0 && *argv && (**argv == '-')) { /* do normal option parsing */
while (*++(*argv)) { while ((opt = getopt(argc, argv, "iHhnqvs")) > 0) {
switch (**argv) { switch (opt) {
case 'i': case 'i':
ignoreCase = TRUE; ignore_case++;
break;
case 'H':
print_filename++;
break; break;
case 'h': case 'h':
tellName = FALSE; print_filename--;
break; break;
case 'n': case 'n':
tellLine = TRUE; print_line_num++;
break; break;
case 'q': case 'q':
beQuiet = TRUE; be_quiet++;
break; break;
case 'v': case 'v':
invertSearch = TRUE; invert_search++;
break;
case 's':
suppress_err_msgs++;
break; break;
default:
usage(grep_usage);
}
} }
argv++;
} }
if (argc == 0 || *argv == NULL) { /* argv[optind] should be the regex pattern; no pattern, no worky */
fatalError(too_few_args, "grep"); if (argv[optind] == NULL)
usage(grep_usage);
/* compile the regular expression */
reflags = REG_NOSUB; /* we're not going to mess with sub-expressions */
if (ignore_case)
reflags |= REG_ICASE;
if ((ret = regcomp(&regex, argv[optind], reflags)) != 0) {
int errmsgsz = regerror(ret, &regex, NULL, 0);
char *errmsg = malloc(errmsgsz);
if (errmsg == NULL) {
fprintf(stderr, "grep: memory error\n");
regfree(&regex);
exit(1);
}
regerror(ret, &regex, errmsg, errmsgsz);
fprintf(stderr, "grep: %s\n", errmsg);
free(errmsg);
regfree(&regex);
exit(1);
} }
needle = *argv++; /* argv[(optind+1)..(argc-1)] should be names of file to grep through. If
argc--; * there is more than one file to grep, we will print the filenames */
if ((argc-1) - (optind+1) > 0)
print_filename++;
if (argc == 0) { /* If no files were specified, take input from stdin. Otherwise, we grep
do_grep(stdin, needle, "stdin", FALSE, ignoreCase, tellLine, invertSearch); * through all the files specified. */
if (argv[optind+1] == NULL) {
grep_file(stdin);
} else { } else {
/* Never print the filename for just one file */ int i;
if (argc == 1) FILE *file;
tellName = FALSE; for (i = optind + 1; i < argc; i++) {
while (argc-- > 0) { cur_file = argv[i];
fileName = *argv++; file = fopen(cur_file, "r");
if (file == NULL) {
fp = fopen(fileName, "r"); if (!suppress_err_msgs)
if (fp == NULL) { fprintf(stderr, "grep: %s: %s\n", cur_file, strerror(errno));
perror(fileName); } else {
continue; grep_file(file);
fclose(file);
} }
do_grep(fp, needle, fileName, tellName, ignoreCase, tellLine, invertSearch);
if (ferror(fp))
perror(fileName);
fclose(fp);
} }
} }
return(match);
regfree(&regex);
if (nmatches == 0)
return 1;
return 0;
} }
/* END CODE */

261
grep.c
View File

@ -1,10 +1,8 @@
/* vi: set sw=4 ts=4: */
/* /*
* Mini grep implementation for busybox * Mini grep implementation for busybox using libc regex.
*
* *
* Copyright (C) 1999,2000 by Lineo, inc. * Copyright (C) 1999,2000 by Lineo, inc.
* Written by Erik Andersen <andersen@lineo.com>, <andersee@debian.org> * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com>
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -22,149 +20,206 @@
* *
*/ */
/*
18-Dec-1999 Konstantin Boldyshev <konst@voshod.com>
+ -q option (be quiet)
+ exit code depending on grep result (TRUE or FALSE)
(useful for scripts)
*/
#include "internal.h"
#include "regexp.h"
#include <stdio.h> #include <stdio.h>
#include <dirent.h> #include <stdlib.h>
#include <unistd.h> /* for getopt() */
#include <regex.h>
#include <string.h> /* for strerror() */
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include "internal.h"
#include <signal.h>
#include <time.h> extern int optind; /* in unistd.h */
#include <ctype.h> extern int errno; /* for use with strerror() */
#define BB_DECLARE_EXTERN
#define bb_need_too_few_args
#include "messages.c"
static const char grep_usage[] = static const char grep_usage[] =
"grep [OPTIONS]... PATTERN [FILE]...\n" "grep [-ihHnqvs] pattern [files...]\n"
#ifndef BB_FEATURE_TRIVIAL_HELP #ifndef BB_FEATURE_TRIVIAL_HELP
"\nSearch for PATTERN in each FILE or standard input.\n\n" "\nSearch for PATTERN in each FILE or standard input.\n\n"
"OPTIONS:\n" "OPTIONS:\n"
"\t-H\tprefix output lines with filename where match was found\n"
"\t-h\tsuppress the prefixing filename on output\n" "\t-h\tsuppress the prefixing filename on output\n"
"\t-i\tignore case distinctions\n" "\t-i\tignore case distinctions\n"
"\t-n\tprint line number with output lines\n" "\t-n\tprint line number with output lines\n"
"\t-q\tbe quiet. Returns 0 if result was found, 1 otherwise\n" "\t-q\tbe quiet. Returns 0 if result was found, 1 otherwise\n"
"\t-v\tselect non-matching lines\n\n" "\t-v\tselect non-matching lines\n"
#if defined BB_REGEXP "\t-s\tsuppress file open/read error messages\n\n"
"This version of grep matches full regular expressions.\n";
#else
"This version of grep matches strings (not regular expressions).\n"
#endif
#endif #endif
; ;
static int match = FALSE, beQuiet = FALSE; static const int GROWBY = 80; /* how large we will grow strings by */
static void do_grep(FILE * fp, char *needle, char *fileName, int tellName, /* options */
int ignoreCase, int tellLine, int invertSearch) static int ignore_case = 0;
static int print_filename = 0;
static int print_line_num = 0;
static int be_quiet = 0;
static int invert_search = 0;
static int suppress_err_msgs = 0;
/* globals */
static regex_t regex; /* storage space for compiled regular expression */
static int nmatches = 0; /* keeps track of the number of matches */
static char *cur_file = NULL; /* the current file we are reading */
/* This returns a malloc'ed char * which must be stored and free'ed */
/* XXX: This function should probably go in a 'common'/'util'/'misc' file
* somewhere so it can be used by other folks. */
static char *get_line_from_file(FILE *file)
{ {
long line = 0; int ch;
char *haystack; int idx = 0;
int truth = !invertSearch; char *linebuf = NULL;
int linebufsz = 0;
while ((haystack = cstring_lineFromFile(fp))) { while (1) {
line++; ch = fgetc(file);
if (find_match(haystack, needle, ignoreCase) == truth) { if (ch == EOF)
if (tellName == TRUE) break;
printf("%s:", fileName); /* grow the line buffer as necessary */
if (idx > linebufsz-1)
if (tellLine == TRUE) linebuf = realloc(linebuf, linebufsz += GROWBY);
printf("%ld:", line); linebuf[idx++] = (char)ch;
if ((char)ch == '\n')
if (beQuiet == FALSE) break;
fputs(haystack, stdout);
match = TRUE;
}
free(haystack);
} }
if (idx == 0)
return NULL;
linebuf[idx] = 0;
return linebuf;
} }
static void print_matched_line(char *line, int linenum)
{
if (print_filename)
printf("%s:", cur_file);
if (print_line_num)
printf("%i:", linenum);
printf("%s", line);
}
static void grep_file(FILE *file)
{
char *line = NULL;
int ret;
int linenum = 0;
while ((line = get_line_from_file(file)) != NULL) {
linenum++;
ret = regexec(&regex, line, 0, NULL, 0);
if (ret == 0 && !invert_search) { /* match */
/* if we found a match but were told to be quiet, stop here and
* return success */
if (be_quiet) {
regfree(&regex);
exit(0);
}
nmatches++;
print_matched_line(line, linenum);
} else if (ret == REG_NOMATCH && invert_search) {
print_matched_line(line, linenum);
}
free(line);
}
}
extern int grep_main(int argc, char **argv) extern int grep_main(int argc, char **argv)
{ {
FILE *fp; int opt;
char *needle; int reflags;
char *fileName; int ret;
int tellName = TRUE;
int ignoreCase = FALSE;
int tellLine = FALSE;
int invertSearch = FALSE;
if (argc < 1) { /* do special-case option parsing */
if (argv[1] && (strcmp(argv[1], "--help") == 0))
usage(grep_usage); usage(grep_usage);
}
argv++;
while (--argc >= 0 && *argv && (**argv == '-')) { /* do normal option parsing */
while (*++(*argv)) { while ((opt = getopt(argc, argv, "iHhnqvs")) > 0) {
switch (**argv) { switch (opt) {
case 'i': case 'i':
ignoreCase = TRUE; ignore_case++;
break;
case 'H':
print_filename++;
break; break;
case 'h': case 'h':
tellName = FALSE; print_filename--;
break; break;
case 'n': case 'n':
tellLine = TRUE; print_line_num++;
break; break;
case 'q': case 'q':
beQuiet = TRUE; be_quiet++;
break; break;
case 'v': case 'v':
invertSearch = TRUE; invert_search++;
break;
case 's':
suppress_err_msgs++;
break; break;
default:
usage(grep_usage);
}
} }
argv++;
} }
if (argc == 0 || *argv == NULL) { /* argv[optind] should be the regex pattern; no pattern, no worky */
fatalError(too_few_args, "grep"); if (argv[optind] == NULL)
usage(grep_usage);
/* compile the regular expression */
reflags = REG_NOSUB; /* we're not going to mess with sub-expressions */
if (ignore_case)
reflags |= REG_ICASE;
if ((ret = regcomp(&regex, argv[optind], reflags)) != 0) {
int errmsgsz = regerror(ret, &regex, NULL, 0);
char *errmsg = malloc(errmsgsz);
if (errmsg == NULL) {
fprintf(stderr, "grep: memory error\n");
regfree(&regex);
exit(1);
}
regerror(ret, &regex, errmsg, errmsgsz);
fprintf(stderr, "grep: %s\n", errmsg);
free(errmsg);
regfree(&regex);
exit(1);
} }
needle = *argv++; /* argv[(optind+1)..(argc-1)] should be names of file to grep through. If
argc--; * there is more than one file to grep, we will print the filenames */
if ((argc-1) - (optind+1) > 0)
print_filename++;
if (argc == 0) { /* If no files were specified, take input from stdin. Otherwise, we grep
do_grep(stdin, needle, "stdin", FALSE, ignoreCase, tellLine, invertSearch); * through all the files specified. */
if (argv[optind+1] == NULL) {
grep_file(stdin);
} else { } else {
/* Never print the filename for just one file */ int i;
if (argc == 1) FILE *file;
tellName = FALSE; for (i = optind + 1; i < argc; i++) {
while (argc-- > 0) { cur_file = argv[i];
fileName = *argv++; file = fopen(cur_file, "r");
if (file == NULL) {
fp = fopen(fileName, "r"); if (!suppress_err_msgs)
if (fp == NULL) { fprintf(stderr, "grep: %s: %s\n", cur_file, strerror(errno));
perror(fileName); } else {
continue; grep_file(file);
fclose(file);
} }
do_grep(fp, needle, fileName, tellName, ignoreCase, tellLine, invertSearch);
if (ferror(fp))
perror(fileName);
fclose(fp);
} }
} }
return(match);
regfree(&regex);
if (nmatches == 0)
return 1;
return 0;
} }
/* END CODE */