grep: option to use GNU regex matching instead of POSIX one.
This fixes problems with NULs in files being scanned, but costs +800 bytes. The same can be done to sed (TODO).
This commit is contained in:
parent
fb5902ca5c
commit
3fd15e197e
@ -21,6 +21,15 @@ config DESKTOP
|
|||||||
Select this only if you plan to use busybox on full-blown
|
Select this only if you plan to use busybox on full-blown
|
||||||
desktop machine with common Linux distro, not on an embedded box.
|
desktop machine with common Linux distro, not on an embedded box.
|
||||||
|
|
||||||
|
config EXTRA_COMPAT
|
||||||
|
bool "Provide compatible behavior for rare corner cases (bigger code)"
|
||||||
|
default n
|
||||||
|
help
|
||||||
|
This option makes grep, sed etc handle rare corner cases
|
||||||
|
(embedded NUL bytes and such). This makes code bigger and uses
|
||||||
|
some GNU extensions in libc. You probably only need this option
|
||||||
|
if you plan to run busybox on desktop.
|
||||||
|
|
||||||
config FEATURE_ASSUME_UNICODE
|
config FEATURE_ASSUME_UNICODE
|
||||||
bool "Assume that 1:1 char/glyph correspondence is not true"
|
bool "Assume that 1:1 char/glyph correspondence is not true"
|
||||||
default n
|
default n
|
||||||
|
129
findutils/grep.c
129
findutils/grep.c
@ -96,6 +96,7 @@ struct globals {
|
|||||||
int lines_before;
|
int lines_before;
|
||||||
int lines_after;
|
int lines_after;
|
||||||
char **before_buf;
|
char **before_buf;
|
||||||
|
USE_EXTRA_COMPAT(size_t *before_buf_size;)
|
||||||
int last_line_printed;
|
int last_line_printed;
|
||||||
#endif
|
#endif
|
||||||
/* globals used internally */
|
/* globals used internally */
|
||||||
@ -117,6 +118,7 @@ struct globals {
|
|||||||
#define lines_before (G.lines_before )
|
#define lines_before (G.lines_before )
|
||||||
#define lines_after (G.lines_after )
|
#define lines_after (G.lines_after )
|
||||||
#define before_buf (G.before_buf )
|
#define before_buf (G.before_buf )
|
||||||
|
#define before_buf_size (G.before_buf_size )
|
||||||
#define last_line_printed (G.last_line_printed )
|
#define last_line_printed (G.last_line_printed )
|
||||||
#define pattern_head (G.pattern_head )
|
#define pattern_head (G.pattern_head )
|
||||||
#define cur_file (G.cur_file )
|
#define cur_file (G.cur_file )
|
||||||
@ -124,14 +126,24 @@ struct globals {
|
|||||||
|
|
||||||
typedef struct grep_list_data_t {
|
typedef struct grep_list_data_t {
|
||||||
char *pattern;
|
char *pattern;
|
||||||
regex_t preg;
|
/* for GNU regex, matched_range must be persistent across grep_file() calls */
|
||||||
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
|
regex_t compiled_regex;
|
||||||
|
regmatch_t matched_range;
|
||||||
|
#else
|
||||||
|
struct re_pattern_buffer compiled_regex;
|
||||||
|
struct re_registers matched_range;
|
||||||
|
#endif
|
||||||
#define ALLOCATED 1
|
#define ALLOCATED 1
|
||||||
#define COMPILED 2
|
#define COMPILED 2
|
||||||
int flg_mem_alocated_compiled;
|
int flg_mem_alocated_compiled;
|
||||||
} grep_list_data_t;
|
} grep_list_data_t;
|
||||||
|
|
||||||
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
static void print_line(const char *line, int linenum, char decoration)
|
#define print_line(line, line_len, linenum, decoration) \
|
||||||
|
print_line(line, linenum, decoration)
|
||||||
|
#endif
|
||||||
|
static void print_line(const char *line, size_t line_len, int linenum, char decoration)
|
||||||
{
|
{
|
||||||
#if ENABLE_FEATURE_GREP_CONTEXT
|
#if ENABLE_FEATURE_GREP_CONTEXT
|
||||||
/* Happens when we go to next file, immediately hit match
|
/* Happens when we go to next file, immediately hit match
|
||||||
@ -139,8 +151,9 @@ static void print_line(const char *line, int linenum, char decoration)
|
|||||||
if (linenum < 1)
|
if (linenum < 1)
|
||||||
return;
|
return;
|
||||||
/* possibly print the little '--' separator */
|
/* possibly print the little '--' separator */
|
||||||
if ((lines_before || lines_after) && did_print_line &&
|
if ((lines_before || lines_after) && did_print_line
|
||||||
last_line_printed != linenum - 1) {
|
&& last_line_printed != linenum - 1
|
||||||
|
) {
|
||||||
puts("--");
|
puts("--");
|
||||||
}
|
}
|
||||||
/* guard against printing "--" before first line of first file */
|
/* guard against printing "--" before first line of first file */
|
||||||
@ -152,17 +165,50 @@ static void print_line(const char *line, int linenum, char decoration)
|
|||||||
if (PRINT_LINE_NUM)
|
if (PRINT_LINE_NUM)
|
||||||
printf("%i%c", linenum, decoration);
|
printf("%i%c", linenum, decoration);
|
||||||
/* Emulate weird GNU grep behavior with -ov */
|
/* Emulate weird GNU grep behavior with -ov */
|
||||||
if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o))
|
if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o)) {
|
||||||
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
puts(line);
|
puts(line);
|
||||||
|
#else
|
||||||
|
fwrite(line, 1, line_len, stdout);
|
||||||
|
putchar('\n');
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if ENABLE_EXTRA_COMPAT
|
||||||
|
/* Unlike getline, this one removes trailing '\n' */
|
||||||
|
static ssize_t FAST_FUNC bb_getline(char **line_ptr, size_t *line_alloc_len, FILE *file)
|
||||||
|
{
|
||||||
|
ssize_t res_sz;
|
||||||
|
char *line;
|
||||||
|
|
||||||
|
res_sz = getline(line_ptr, line_alloc_len, file);
|
||||||
|
line = *line_ptr;
|
||||||
|
|
||||||
|
if (res_sz > 0) {
|
||||||
|
if (line[res_sz - 1] == '\n')
|
||||||
|
line[--res_sz] = '\0';
|
||||||
|
} else {
|
||||||
|
free(line); /* uclibc allocates a buffer even on EOF. WTF? */
|
||||||
|
}
|
||||||
|
return res_sz;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int grep_file(FILE *file)
|
static int grep_file(FILE *file)
|
||||||
{
|
{
|
||||||
char *line;
|
|
||||||
smalluint found;
|
smalluint found;
|
||||||
int linenum = 0;
|
int linenum = 0;
|
||||||
int nmatches = 0;
|
int nmatches = 0;
|
||||||
regmatch_t regmatch;
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
|
char *line;
|
||||||
|
#else
|
||||||
|
char *line = NULL;
|
||||||
|
ssize_t line_len;
|
||||||
|
size_t line_alloc_len;
|
||||||
|
#define rm_so start[0]
|
||||||
|
#define rm_eo end[0]
|
||||||
|
#endif
|
||||||
#if ENABLE_FEATURE_GREP_CONTEXT
|
#if ENABLE_FEATURE_GREP_CONTEXT
|
||||||
int print_n_lines_after = 0;
|
int print_n_lines_after = 0;
|
||||||
int curpos = 0; /* track where we are in the circular 'before' buffer */
|
int curpos = 0; /* track where we are in the circular 'before' buffer */
|
||||||
@ -171,7 +217,13 @@ static int grep_file(FILE *file)
|
|||||||
enum { print_n_lines_after = 0 };
|
enum { print_n_lines_after = 0 };
|
||||||
#endif /* ENABLE_FEATURE_GREP_CONTEXT */
|
#endif /* ENABLE_FEATURE_GREP_CONTEXT */
|
||||||
|
|
||||||
while ((line = xmalloc_fgetline(file)) != NULL) {
|
while (
|
||||||
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
|
(line = xmalloc_fgetline(file)) != NULL
|
||||||
|
#else
|
||||||
|
(line_len = bb_getline(&line, &line_alloc_len, file)) >= 0
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
llist_t *pattern_ptr = pattern_head;
|
llist_t *pattern_ptr = pattern_head;
|
||||||
grep_list_data_t *gl = gl; /* for gcc */
|
grep_list_data_t *gl = gl; /* for gcc */
|
||||||
|
|
||||||
@ -184,19 +236,35 @@ static int grep_file(FILE *file)
|
|||||||
} else {
|
} else {
|
||||||
if (!(gl->flg_mem_alocated_compiled & COMPILED)) {
|
if (!(gl->flg_mem_alocated_compiled & COMPILED)) {
|
||||||
gl->flg_mem_alocated_compiled |= COMPILED;
|
gl->flg_mem_alocated_compiled |= COMPILED;
|
||||||
xregcomp(&(gl->preg), gl->pattern, reflags);
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
|
xregcomp(&gl->compiled_regex, gl->pattern, reflags);
|
||||||
|
#else
|
||||||
|
memset(&gl->compiled_regex, 0, sizeof(gl->compiled_regex));
|
||||||
|
if (re_compile_pattern(gl->pattern, strlen(gl->pattern), &gl->compiled_regex))
|
||||||
|
bb_error_msg_and_die("bad regex '%s'", gl->pattern);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
regmatch.rm_so = 0;
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
regmatch.rm_eo = 0;
|
gl->matched_range.rm_so = 0;
|
||||||
if (regexec(&(gl->preg), line, 1, ®match, 0) == 0) {
|
gl->matched_range.rm_eo = 0;
|
||||||
|
#endif
|
||||||
|
if (
|
||||||
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
|
regexec(&gl->compiled_regex, line, 1, &gl->matched_range, 0) == 0
|
||||||
|
#else
|
||||||
|
re_search(&gl->compiled_regex, line, line_len,
|
||||||
|
/*start:*/ 0, /*range:*/ line_len,
|
||||||
|
&gl->matched_range) >= 0
|
||||||
|
#endif
|
||||||
|
) {
|
||||||
if (!(option_mask32 & OPT_w))
|
if (!(option_mask32 & OPT_w))
|
||||||
found = 1;
|
found = 1;
|
||||||
else {
|
else {
|
||||||
char c = ' ';
|
char c = ' ';
|
||||||
if (regmatch.rm_so)
|
if (gl->matched_range.rm_so)
|
||||||
c = line[regmatch.rm_so - 1];
|
c = line[gl->matched_range.rm_so - 1];
|
||||||
if (!isalnum(c) && c != '_') {
|
if (!isalnum(c) && c != '_') {
|
||||||
c = line[regmatch.rm_eo];
|
c = line[gl->matched_range.rm_eo];
|
||||||
if (!c || (!isalnum(c) && c != '_'))
|
if (!c || (!isalnum(c) && c != '_'))
|
||||||
found = 1;
|
found = 1;
|
||||||
}
|
}
|
||||||
@ -261,7 +329,7 @@ static int grep_file(FILE *file)
|
|||||||
|
|
||||||
/* now print each line in the buffer, clearing them as we go */
|
/* now print each line in the buffer, clearing them as we go */
|
||||||
while (before_buf[idx] != NULL) {
|
while (before_buf[idx] != NULL) {
|
||||||
print_line(before_buf[idx], first_buf_entry_line_num, '-');
|
print_line(before_buf[idx], before_buf_size[idx], first_buf_entry_line_num, '-');
|
||||||
free(before_buf[idx]);
|
free(before_buf[idx]);
|
||||||
before_buf[idx] = NULL;
|
before_buf[idx] = NULL;
|
||||||
idx = (idx + 1) % lines_before;
|
idx = (idx + 1) % lines_before;
|
||||||
@ -277,13 +345,15 @@ static int grep_file(FILE *file)
|
|||||||
/* -Fo just prints the pattern
|
/* -Fo just prints the pattern
|
||||||
* (unless -v: -Fov doesnt print anything at all) */
|
* (unless -v: -Fov doesnt print anything at all) */
|
||||||
if (found)
|
if (found)
|
||||||
print_line(gl->pattern, linenum, ':');
|
print_line(gl->pattern, strlen(gl->pattern), linenum, ':');
|
||||||
} else {
|
} else {
|
||||||
line[regmatch.rm_eo] = '\0';
|
line[gl->matched_range.rm_eo] = '\0';
|
||||||
print_line(line + regmatch.rm_so, linenum, ':');
|
print_line(line + gl->matched_range.rm_so,
|
||||||
|
gl->matched_range.rm_eo - gl->matched_range.rm_so,
|
||||||
|
linenum, ':');
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
print_line(line, linenum, ':');
|
print_line(line, line_len, linenum, ':');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -291,12 +361,13 @@ static int grep_file(FILE *file)
|
|||||||
else { /* no match */
|
else { /* no match */
|
||||||
/* if we need to print some context lines after the last match, do so */
|
/* if we need to print some context lines after the last match, do so */
|
||||||
if (print_n_lines_after) {
|
if (print_n_lines_after) {
|
||||||
print_line(line, linenum, '-');
|
print_line(line, strlen(line), linenum, '-');
|
||||||
print_n_lines_after--;
|
print_n_lines_after--;
|
||||||
} else if (lines_before) {
|
} else if (lines_before) {
|
||||||
/* Add the line to the circular 'before' buffer */
|
/* Add the line to the circular 'before' buffer */
|
||||||
free(before_buf[curpos]);
|
free(before_buf[curpos]);
|
||||||
before_buf[curpos] = line;
|
before_buf[curpos] = line;
|
||||||
|
USE_EXTRA_COMPAT(before_buf_size[curpos] = line_len;)
|
||||||
curpos = (curpos + 1) % lines_before;
|
curpos = (curpos + 1) % lines_before;
|
||||||
/* avoid free(line) - we took the line */
|
/* avoid free(line) - we took the line */
|
||||||
line = NULL;
|
line = NULL;
|
||||||
@ -304,13 +375,14 @@ static int grep_file(FILE *file)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif /* ENABLE_FEATURE_GREP_CONTEXT */
|
#endif /* ENABLE_FEATURE_GREP_CONTEXT */
|
||||||
|
#if !ENABLE_EXTRA_COMPAT
|
||||||
free(line);
|
free(line);
|
||||||
|
#endif
|
||||||
/* Did we print all context after last requested match? */
|
/* Did we print all context after last requested match? */
|
||||||
if ((option_mask32 & OPT_m)
|
if ((option_mask32 & OPT_m)
|
||||||
&& !print_n_lines_after && nmatches == max_matches)
|
&& !print_n_lines_after && nmatches == max_matches)
|
||||||
break;
|
break;
|
||||||
}
|
} /* while (read line) */
|
||||||
|
|
||||||
/* special-case file post-processing for options where we don't print line
|
/* special-case file post-processing for options where we don't print line
|
||||||
* matches, just filenames and possibly match counts */
|
* matches, just filenames and possibly match counts */
|
||||||
@ -428,15 +500,16 @@ int grep_main(int argc, char **argv)
|
|||||||
lines_after = Copt;
|
lines_after = Copt;
|
||||||
if (!(option_mask32 & OPT_B)) /* not overridden */
|
if (!(option_mask32 & OPT_B)) /* not overridden */
|
||||||
lines_before = Copt;
|
lines_before = Copt;
|
||||||
//option_mask32 |= OPT_A|OPT_B; /* for parser */
|
|
||||||
}
|
}
|
||||||
/* sanity checks */
|
/* sanity checks */
|
||||||
if (option_mask32 & (OPT_c|OPT_q|OPT_l|OPT_L)) {
|
if (option_mask32 & (OPT_c|OPT_q|OPT_l|OPT_L)) {
|
||||||
option_mask32 &= ~OPT_n;
|
option_mask32 &= ~OPT_n;
|
||||||
lines_before = 0;
|
lines_before = 0;
|
||||||
lines_after = 0;
|
lines_after = 0;
|
||||||
} else if (lines_before > 0)
|
} else if (lines_before > 0) {
|
||||||
before_buf = xzalloc(lines_before * sizeof(char *));
|
before_buf = xzalloc(lines_before * sizeof(before_buf[0]));
|
||||||
|
USE_EXTRA_COMPAT(before_buf_size = xzalloc(lines_before * sizeof(before_buf_size[0]));)
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
/* with auto sanity checks */
|
/* with auto sanity checks */
|
||||||
/* -H unsets -h; -c,-q or -l unset -n; -e,-f are lists; -m N */
|
/* -H unsets -h; -c,-q or -l unset -n; -e,-f are lists; -m N */
|
||||||
@ -537,7 +610,7 @@ int grep_main(int argc, char **argv)
|
|||||||
if (gl->flg_mem_alocated_compiled & ALLOCATED)
|
if (gl->flg_mem_alocated_compiled & ALLOCATED)
|
||||||
free(gl->pattern);
|
free(gl->pattern);
|
||||||
if (gl->flg_mem_alocated_compiled & COMPILED)
|
if (gl->flg_mem_alocated_compiled & COMPILED)
|
||||||
regfree(&(gl->preg));
|
regfree(&gl->compiled_regex);
|
||||||
free(gl);
|
free(gl);
|
||||||
free(pattern_head_ptr);
|
free(pattern_head_ptr);
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,10 @@
|
|||||||
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
|
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* for getline() [GNUism] */
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
#define _GNU_SOURCE 1
|
||||||
|
#endif
|
||||||
#include "libbb.h"
|
#include "libbb.h"
|
||||||
|
|
||||||
/* This function reads an entire line from a text file, up to a newline
|
/* This function reads an entire line from a text file, up to a newline
|
||||||
@ -55,7 +59,6 @@ char* FAST_FUNC xmalloc_fgets(FILE *file)
|
|||||||
|
|
||||||
return bb_get_chunk_from_file(file, &i);
|
return bb_get_chunk_from_file(file, &i);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get line. Remove trailing \n */
|
/* Get line. Remove trailing \n */
|
||||||
char* FAST_FUNC xmalloc_fgetline(FILE *file)
|
char* FAST_FUNC xmalloc_fgetline(FILE *file)
|
||||||
{
|
{
|
||||||
@ -68,6 +71,44 @@ char* FAST_FUNC xmalloc_fgetline(FILE *file)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
|
||||||
|
/* GNUism getline() should be faster (not tested) than a loop with fgetc */
|
||||||
|
|
||||||
|
/* Get line, including trailing \n if any */
|
||||||
|
char* FAST_FUNC xmalloc_fgets(FILE *file)
|
||||||
|
{
|
||||||
|
char *res_buf = NULL;
|
||||||
|
size_t res_sz;
|
||||||
|
|
||||||
|
if (getline(&res_buf, &res_sz, file) == -1) {
|
||||||
|
free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */
|
||||||
|
res_buf = NULL;
|
||||||
|
}
|
||||||
|
//TODO: trimming to res_sz?
|
||||||
|
return res_buf;
|
||||||
|
}
|
||||||
|
/* Get line. Remove trailing \n */
|
||||||
|
char* FAST_FUNC xmalloc_fgetline(FILE *file)
|
||||||
|
{
|
||||||
|
char *res_buf = NULL;
|
||||||
|
size_t res_sz;
|
||||||
|
|
||||||
|
res_sz = getline(&res_buf, &res_sz, file);
|
||||||
|
|
||||||
|
if ((ssize_t)res_sz != -1) {
|
||||||
|
if (res_buf[res_sz - 1] == '\n')
|
||||||
|
res_buf[--res_sz] = '\0';
|
||||||
|
//TODO: trimming to res_sz?
|
||||||
|
} else {
|
||||||
|
free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */
|
||||||
|
res_buf = NULL;
|
||||||
|
}
|
||||||
|
return res_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
/* Faster routines (~twice as fast). +170 bytes. Unused as of 2008-07.
|
/* Faster routines (~twice as fast). +170 bytes. Unused as of 2008-07.
|
||||||
*
|
*
|
||||||
|
@ -27,6 +27,6 @@ void FAST_FUNC xregcomp(regex_t *preg, const char *regex, int cflags)
|
|||||||
{
|
{
|
||||||
char *errmsg = regcomp_or_errmsg(preg, regex, cflags);
|
char *errmsg = regcomp_or_errmsg(preg, regex, cflags);
|
||||||
if (errmsg) {
|
if (errmsg) {
|
||||||
bb_error_msg_and_die("xregcomp: %s", errmsg);
|
bb_error_msg_and_die("bad regex '%s': %s", regex, errmsg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,12 +62,8 @@ testing "grep -s nofile - (stdin and nonexisting file, match)" \
|
|||||||
"grep -s domatch nonexistent - ; echo \$?" \
|
"grep -s domatch nonexistent - ; echo \$?" \
|
||||||
"(standard input):domatch\n2\n" "" "nomatch\ndomatch\nend\n"
|
"(standard input):domatch\n2\n" "" "nomatch\ndomatch\nend\n"
|
||||||
|
|
||||||
# This doesn't match GNU behaviour (Binary file input matches)
|
testing "grep handles NUL in files" "grep -a foo input" "\0foo\n" "\0foo\n\n" ""
|
||||||
# acts like GNU grep -a
|
testing "grep handles NUL on stdin" "grep -a foo" "\0foo\n" "" "\0foo\n\n"
|
||||||
testing "grep handles binary files" "grep foo input" "foo\n" "\0foo\n\n" ""
|
|
||||||
# This doesn't match GNU behaviour (Binary file (standard input) matches)
|
|
||||||
# acts like GNU grep -a
|
|
||||||
testing "grep handles binary stdin" "grep foo" "foo\n" "" "\0foo\n\n"
|
|
||||||
|
|
||||||
testing "grep matches NUL" "grep . input > /dev/null 2>&1 ; echo \$?" \
|
testing "grep matches NUL" "grep . input > /dev/null 2>&1 ; echo \$?" \
|
||||||
"0\n" "\0\n" ""
|
"0\n" "\0\n" ""
|
||||||
|
Loading…
Reference in New Issue
Block a user