sed: improve handling of NULs

This commit is contained in:
Denis Vlasenko 2006-12-02 17:58:10 +00:00
parent becd8c538c
commit 8b22b07bc5
2 changed files with 68 additions and 50 deletions

View File

@ -80,8 +80,8 @@ typedef struct sed_cmd_s {
/* Bitfields (gcc won't group them if we don't) */ /* Bitfields (gcc won't group them if we don't) */
unsigned int invert:1; /* the '!' after the address */ unsigned int invert:1; /* the '!' after the address */
unsigned int in_match:1; /* Next line also included in match? */ unsigned int in_match:1; /* Next line also included in match? */
unsigned int no_newline:1; /* Last line written by (sw) had no '\n' */
unsigned int sub_p:1; /* (s) print option */ unsigned int sub_p:1; /* (s) print option */
int last_char; /* Last line written by (sw) had no '\n' */
/* GENERAL FIELDS */ /* GENERAL FIELDS */
char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */ char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
@ -712,56 +712,73 @@ static void add_input_file(FILE *file)
/* Get next line of input from bbg.input_file_list, flushing append buffer and /* Get next line of input from bbg.input_file_list, flushing append buffer and
* noting if we ran out of files without a newline on the last line we read. * noting if we ran out of files without a newline on the last line we read.
*/ */
static char *get_next_line(int *no_newline) static char *get_next_line(int *last_char)
{ {
char *temp = NULL; char *temp = NULL;
int len; int len;
/* will be returned if last line in the file
* doesn't end with either '\n' or '\0' */
*last_char = 0x100;
flush_append(); flush_append();
while (bbg.current_input_file < bbg.input_file_count) { while (bbg.current_input_file < bbg.input_file_count) {
temp = bb_get_chunk_from_file(bbg.input_file_list[bbg.current_input_file],&len); temp = bb_get_chunk_from_file(
bbg.input_file_list[bbg.current_input_file], &len);
if (temp) { if (temp) {
*no_newline = !(len && temp[len-1] == '\n'); /* len > 0 here, it's ok to do temp[len-1] */
if (!*no_newline) temp[len-1] = 0; char c = temp[len-1];
if (c == '\n' || c == '\0') {
temp[len-1] = '\0';
*last_char = (unsigned char)c;
}
break; break;
// Close this file and advance to next one }
} else /* Close this file and advance to next one */
fclose(bbg.input_file_list[bbg.current_input_file++]); fclose(bbg.input_file_list[bbg.current_input_file++]);
} }
return temp; return temp;
} }
/* Output line of text. missing_newline means the last line output did not /* Output line of text. */
end with a newline. no_newline means this line does not end with a /* Note:
newline. */ * echo -n thingy >z1
* echo -n again >z2
* >znull
* sed "s/i/z/" z1 z2 znull | hexdump -vC output:
* gnu sed 4.1.5:
* 00000000 74 68 7a 6e 67 79 0a 61 67 61 7a 6e |thzngy.agazn|
* bbox:
* 00000000 74 68 7a 6e 67 79 61 67 61 7a 6e |thzngyagazn|
* I am not sure that bbox is wrong here...
*/
static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline) static int puts_maybe_newline(char *s, FILE *file, int prev_last_char, int last_char)
{ {
if (missing_newline) fputc('\n',file); fputs(s, file);
fputs(s,file); if (last_char < 0x100) fputc(last_char, file);
if (!no_newline) fputc('\n',file);
if (ferror(file)) { if (ferror(file)) {
xfunc_error_retval = 4; /* It's what gnu sed exits with... */ xfunc_error_retval = 4; /* It's what gnu sed exits with... */
bb_error_msg_and_die(bb_msg_write_error); bb_error_msg_and_die(bb_msg_write_error);
} }
return no_newline; return last_char;
} }
#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,bbg.nonstdout,missing_newline,n) #define sed_puts(s, n) \
(prev_last_char = puts_maybe_newline(s, bbg.nonstdout, prev_last_char, n))
/* Process all the lines in all the files */ /* Process all the lines in all the files */
static void process_files(void) static void process_files(void)
{ {
char *pattern_space, *next_line; char *pattern_space, *next_line;
int linenum = 0, missing_newline = 0; int linenum = 0, prev_last_char = 0;
int no_newline,next_no_newline = 0; int last_char, next_last_char = 0;
/* Prime the pump */ /* Prime the pump */
next_line = get_next_line(&next_no_newline); next_line = get_next_line(&next_last_char);
/* go through every line in each file */ /* go through every line in each file */
for (;;) { for (;;) {
@ -771,11 +788,11 @@ static void process_files(void)
/* Advance to next line. Stop if out of lines. */ /* Advance to next line. Stop if out of lines. */
pattern_space = next_line; pattern_space = next_line;
if (!pattern_space) break; if (!pattern_space) break;
no_newline = next_no_newline; last_char = next_last_char;
/* Read one line in advance so we can act on the last line, /* Read one line in advance so we can act on the last line,
* the '$' address */ * the '$' address */
next_line = get_next_line(&next_no_newline); next_line = get_next_line(&next_last_char);
linenum++; linenum++;
restart: restart:
/* for every line, go through all the commands */ /* for every line, go through all the commands */
@ -858,7 +875,7 @@ restart:
/* Write the current pattern space to output */ /* Write the current pattern space to output */
case 'p': case 'p':
sed_puts(pattern_space,no_newline); sed_puts(pattern_space, last_char);
break; break;
/* Delete up through first newline */ /* Delete up through first newline */
case 'D': case 'D':
@ -878,25 +895,24 @@ restart:
/* Substitute with regex */ /* Substitute with regex */
case 's': case 's':
if (do_subst_command(sed_cmd, &pattern_space)) { if (!do_subst_command(sed_cmd, &pattern_space))
substituted |= 1; break;
substituted |= 1;
/* handle p option */ /* handle p option */
if (sed_cmd->sub_p) if (sed_cmd->sub_p)
sed_puts(pattern_space,no_newline); sed_puts(pattern_space, last_char);
/* handle w option */ /* handle w option */
if (sed_cmd->file) if (sed_cmd->file)
sed_cmd->no_newline = puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline); sed_cmd->last_char = puts_maybe_newline(
pattern_space, sed_cmd->file,
} sed_cmd->last_char, last_char);
break; break;
/* Append line to linked list to be printed later */ /* Append line to linked list to be printed later */
case 'a': case 'a':
{
append(sed_cmd->string); append(sed_cmd->string);
break; break;
}
/* Insert text before this line */ /* Insert text before this line */
case 'i': case 'i':
@ -930,18 +946,20 @@ restart:
/* Write pattern space to file. */ /* Write pattern space to file. */
case 'w': case 'w':
sed_cmd->no_newline = puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline); sed_cmd->last_char = puts_maybe_newline(
pattern_space,sed_cmd->file,
sed_cmd->last_char, last_char);
break; break;
/* Read next line from input */ /* Read next line from input */
case 'n': case 'n':
if (!bbg.be_quiet) if (!bbg.be_quiet)
sed_puts(pattern_space,no_newline); sed_puts(pattern_space, last_char);
if (next_line) { if (next_line) {
free(pattern_space); free(pattern_space);
pattern_space = next_line; pattern_space = next_line;
no_newline = next_no_newline; last_char = next_last_char;
next_line = get_next_line(&next_no_newline); next_line = get_next_line(&next_last_char);
linenum++; linenum++;
break; break;
} }
@ -970,8 +988,8 @@ restart:
pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
pattern_space[len] = '\n'; pattern_space[len] = '\n';
strcpy(pattern_space + len+1, next_line); strcpy(pattern_space + len+1, next_line);
no_newline = next_no_newline; last_char = next_last_char;
next_line = get_next_line(&next_no_newline); next_line = get_next_line(&next_last_char);
linenum++; linenum++;
} }
break; break;
@ -1029,7 +1047,7 @@ restart:
strcat(pattern_space, "\n"); strcat(pattern_space, "\n");
if (bbg.hold_space) if (bbg.hold_space)
strcat(pattern_space, bbg.hold_space); strcat(pattern_space, bbg.hold_space);
no_newline = 0; last_char = 0x100;
break; break;
} }
@ -1061,7 +1079,7 @@ restart:
{ {
char *tmp = pattern_space; char *tmp = pattern_space;
pattern_space = bbg.hold_space ? : xzalloc(1); pattern_space = bbg.hold_space ? : xzalloc(1);
no_newline = 0; last_char = 0x100;
bbg.hold_space = tmp; bbg.hold_space = tmp;
break; break;
} }
@ -1075,7 +1093,7 @@ restart:
discard_commands: discard_commands:
/* we will print the line unless we were told to be quiet ('-n') /* we will print the line unless we were told to be quiet ('-n')
or if the line was suppressed (ala 'd'elete) */ or if the line was suppressed (ala 'd'elete) */
if (!bbg.be_quiet) sed_puts(pattern_space,no_newline); if (!bbg.be_quiet) sed_puts(pattern_space, last_char);
/* Delete and such jump here. */ /* Delete and such jump here. */
discard_line: discard_line:

View File

@ -11,8 +11,8 @@
#include "libbb.h" #include "libbb.h"
/* This function reads an entire line from a text file, /* This function reads an entire line from a text file, up to a newline
* up to a newline or NUL byte. It returns a malloc'ed char * which must be * or NUL byte, inclusive. It returns a malloc'ed char * which must be
* stored and free'ed by the caller. If end is null '\n' isn't considered * stored and free'ed by the caller. If end is null '\n' isn't considered
* end of line. If end isn't null, length of the chunk read is stored in it. */ * end of line. If end isn't null, length of the chunk read is stored in it. */
@ -25,7 +25,7 @@ char *bb_get_chunk_from_file(FILE * file, int *end)
while ((ch = getc(file)) != EOF) { while ((ch = getc(file)) != EOF) {
/* grow the line buffer as necessary */ /* grow the line buffer as necessary */
if (idx > linebufsz - 2) { if (idx >= linebufsz) {
linebuf = xrealloc(linebuf, linebufsz += 80); linebuf = xrealloc(linebuf, linebufsz += 80);
} }
linebuf[idx++] = (char) ch; linebuf[idx++] = (char) ch;
@ -35,14 +35,14 @@ char *bb_get_chunk_from_file(FILE * file, int *end)
if (end) if (end)
*end = idx; *end = idx;
if (linebuf) { if (linebuf) {
// huh, is fgets discards prior data on error like this? // huh, does fgets discard prior data on error like this?
// I don't think so.... // I don't think so....
//if (ferror(file)) { //if (ferror(file)) {
// free(linebuf); // free(linebuf);
// return NULL; // return NULL;
//} //}
linebuf = xrealloc(linebuf, idx+1); linebuf = xrealloc(linebuf, idx+1);
linebuf[idx] = 0; linebuf[idx] = '\0';
} }
return linebuf; return linebuf;
} }