2003-10-01 08:36:16 +05:30
|
|
|
/* vi: set sw=4 ts=4: */
|
1999-10-28 21:36:25 +05:30
|
|
|
/*
|
2000-07-11 04:25:51 +05:30
|
|
|
* sed.c - very minimalist version of sed
|
1999-10-28 21:36:25 +05:30
|
|
|
*
|
2001-10-24 10:30:29 +05:30
|
|
|
* Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
|
|
|
|
* Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
|
2002-01-04 02:42:34 +05:30
|
|
|
* Copyright (C) 2002 Matt Kraai
|
2004-04-25 10:41:19 +05:30
|
|
|
* Copyright (C) 2003 by Glenn McGrath <bug1@iinet.net.au>
|
2004-04-01 14:53:30 +05:30
|
|
|
* Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
|
1999-12-30 03:49:46 +05:30
|
|
|
*
|
2006-02-24 08:00:39 +05:30
|
|
|
* MAINTAINER: Rob Landley <rob@landley.net>
|
2006-03-02 01:44:16 +05:30
|
|
|
*
|
2005-11-20 13:14:35 +05:30
|
|
|
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
|
1999-10-28 21:36:25 +05:30
|
|
|
*/
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Code overview.
|
|
|
|
|
|
|
|
Files are laid out to avoid unnecessary function declarations. So for
|
|
|
|
example, every function add_cmd calls occurs before add_cmd in this file.
|
|
|
|
|
|
|
|
add_cmd() is called on each line of sed command text (from a file or from
|
|
|
|
the command line). It calls get_address() and parse_cmd_args(). The
|
|
|
|
resulting sed_cmd_t structures are appended to a linked list
|
|
|
|
(sed_cmd_head/sed_cmd_tail).
|
|
|
|
|
2004-11-25 12:51:47 +05:30
|
|
|
add_input_file() adds a FILE * to the list of input files. We need to
|
2005-11-20 13:14:35 +05:30
|
|
|
know all input sources ahead of time to find the last line for the $ match.
|
2004-11-25 12:51:47 +05:30
|
|
|
|
|
|
|
process_files() does actual sedding, reading data lines from each input FILE *
|
2003-10-01 08:36:16 +05:30
|
|
|
(which could be stdin) and applying the sed command list (sed_cmd_head) to
|
|
|
|
each of the resulting lines.
|
|
|
|
|
|
|
|
sed_main() is where external code calls into this, with a command line.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
/*
|
|
|
|
Supported features and commands in this version of sed:
|
|
|
|
|
|
|
|
- comments ('#')
|
2000-07-14 05:30:15 +05:30
|
|
|
- address matching: num|/matchstr/[,num|/matchstr/|$]command
|
|
|
|
- commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
|
|
|
|
- edit commands: (a)ppend, (i)nsert, (c)hange
|
2001-05-12 03:57:13 +05:30
|
|
|
- file commands: (r)ead
|
2005-10-30 15:38:13 +05:30
|
|
|
- backreferences in substitution expressions (\0, \1, \2...\9)
|
2003-03-09 20:42:24 +05:30
|
|
|
- grouped commands: {cmd1;cmd2}
|
2003-04-09 07:13:54 +05:30
|
|
|
- transliteration (y/source-chars/dest-chars/)
|
|
|
|
- pattern space hold space storing / swapping (g, h, x)
|
2005-05-18 12:04:37 +05:30
|
|
|
- labels / branching (: label, b, t, T)
|
2003-03-09 20:42:24 +05:30
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
(Note: Specifying an address (range) to match is *optional*; commands
|
|
|
|
default to the whole pattern space if no specific address match was
|
|
|
|
requested.)
|
|
|
|
|
2003-09-16 10:55:43 +05:30
|
|
|
Todo:
|
|
|
|
- Create a wrapper around regex to make libc's regex conform with sed
|
2003-04-09 07:13:54 +05:30
|
|
|
|
2003-03-09 08:09:29 +05:30
|
|
|
Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
|
2000-07-11 04:25:51 +05:30
|
|
|
*/
|
|
|
|
|
1999-10-28 21:36:25 +05:30
|
|
|
#include <stdio.h>
|
2003-04-08 17:26:11 +05:30
|
|
|
#include <unistd.h> /* for getopt() */
|
|
|
|
#include <string.h> /* for strdup() */
|
1999-10-28 21:36:25 +05:30
|
|
|
#include <errno.h>
|
2003-04-08 17:26:11 +05:30
|
|
|
#include <ctype.h> /* for isspace() */
|
2001-01-27 13:54:39 +05:30
|
|
|
#include <stdlib.h>
|
2000-09-26 03:15:58 +05:30
|
|
|
#include "busybox.h"
|
2005-09-14 22:29:11 +05:30
|
|
|
#include "xregex.h"
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-03-09 15:53:57 +05:30
|
|
|
typedef struct sed_cmd_s {
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Ordered by alignment requirements: currently 36 bytes on x86 */
|
2003-04-12 21:40:42 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* address storage */
|
|
|
|
regex_t *beg_match; /* sed -e '/match/cmd' */
|
|
|
|
regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
|
|
|
|
regex_t *sub_match; /* For 's/sub_match/string/' */
|
|
|
|
int beg_line; /* 'sed 1p' 0 == apply commands to all lines */
|
|
|
|
int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
|
2006-03-02 01:44:16 +05:30
|
|
|
|
2006-02-24 08:00:39 +05:30
|
|
|
FILE *file; /* File (sw) command writes to, -1 for none. */
|
2003-10-01 08:36:16 +05:30
|
|
|
char *string; /* Data string for (saicytb) commands. */
|
2003-09-13 12:27:39 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
unsigned short which_match; /* (s) Which match to replace (0 for all) */
|
2000-11-04 01:17:00 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Bitfields (gcc won't group them if we don't) */
|
|
|
|
unsigned int invert:1; /* the '!' after the address */
|
|
|
|
unsigned int in_match:1; /* Next line also included in match? */
|
2006-02-24 08:00:39 +05:30
|
|
|
unsigned int no_newline:1; /* Last line written by (sw) had no '\n' */
|
2003-10-01 08:36:16 +05:30
|
|
|
unsigned int sub_p:1; /* (s) print option */
|
2001-11-10 16:19:42 +05:30
|
|
|
|
2003-03-28 09:23:31 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* GENERAL FIELDS */
|
|
|
|
char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
|
|
|
|
struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */
|
2003-03-09 15:53:57 +05:30
|
|
|
} sed_cmd_t;
|
2000-07-11 04:25:51 +05:30
|
|
|
|
|
|
|
/* globals */
|
2003-04-09 07:13:54 +05:30
|
|
|
/* options */
|
2004-10-30 12:24:19 +05:30
|
|
|
static int be_quiet, in_place, regex_type;
|
2005-04-17 01:09:00 +05:30
|
|
|
static FILE *nonstdout;
|
|
|
|
static char *outname,*hold_space;
|
2004-02-18 15:24:15 +05:30
|
|
|
|
2004-11-25 12:51:47 +05:30
|
|
|
/* List of input files */
|
2005-04-17 01:09:00 +05:30
|
|
|
static int input_file_count,current_input_file;
|
|
|
|
static FILE **input_file_list;
|
2003-10-01 08:36:16 +05:30
|
|
|
|
2003-04-09 07:13:54 +05:30
|
|
|
static const char bad_format_in_subst[] =
|
|
|
|
"bad format in substitution expression";
|
2005-04-17 01:09:00 +05:30
|
|
|
static const char *const semicolon_whitespace = "; \n\r\t\v";
|
2003-10-01 08:36:16 +05:30
|
|
|
|
2005-04-17 01:09:00 +05:30
|
|
|
static regmatch_t regmatch[10];
|
2004-10-30 12:24:19 +05:30
|
|
|
static regex_t *previous_regex_ptr;
|
2003-04-09 07:13:54 +05:30
|
|
|
|
2003-03-28 09:23:31 +05:30
|
|
|
/* linked list of sed commands */
|
|
|
|
static sed_cmd_t sed_cmd_head;
|
|
|
|
static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
|
1999-10-28 21:36:25 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Linked list of append lines */
|
Doug Swarin pointed out a security bug in the -i option of sed.
While the permissions on the temp file are correct to prevent it from being
maliciously mangled by passing strangers, (created with 600, opened O_EXCL,
etc), the permissions on the _directory_ might not be, and we re-open the
file to convert the filehandle to a FILE * (and automatically get an error
message and exit if the directory's read-only or out of space or some such).
This opens a potential race condition if somebody's using dnotify on the
directory, deletes/renames the tempfile, and drops a symlink or something
there. Somebody running sed -i as root in a world writeable directory could
do damage.
I dug up notes on an earlier discussion where we looked at the security
implications of this (unfortunately on the #uclibc channel rather than email;
I don't have a transcript, just notes-to-self) which pointed out that if the
permissions on the directory allow other people's files to be deleted/renamed
then the original file is vulnerable to sabotage anyway. However, there are
two cases that discussion apparently didn't take into account:
1) Using another user's permissions to damage files in other directories you
can't access (standard symlink attack).
2) Reading data another user couldn't otherwise access by having the new file
belong to that other user.
This patch uses fdopen to convert the filehandle into a FILE *, rather than
reopening the file.
2005-05-18 11:26:16 +05:30
|
|
|
struct append_list {
|
2003-10-01 08:36:16 +05:30
|
|
|
char *string;
|
|
|
|
struct append_list *next;
|
|
|
|
};
|
2005-04-17 01:09:00 +05:30
|
|
|
static struct append_list *append_head=NULL, *append_tail=NULL;
|
2003-04-09 07:13:54 +05:30
|
|
|
|
2006-03-02 15:36:22 +05:30
|
|
|
void sed_free_and_close_stuff(void);
|
2006-03-02 01:44:16 +05:30
|
|
|
#if ENABLE_FEATURE_CLEAN_UP
|
2006-03-02 15:36:22 +05:30
|
|
|
void sed_free_and_close_stuff(void)
|
2000-07-11 04:25:51 +05:30
|
|
|
{
|
2003-04-09 07:13:54 +05:30
|
|
|
sed_cmd_t *sed_cmd = sed_cmd_head.next;
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
while(append_head) {
|
|
|
|
append_tail=append_head->next;
|
|
|
|
free(append_head->string);
|
|
|
|
free(append_head);
|
|
|
|
append_head=append_tail;
|
|
|
|
}
|
|
|
|
|
2003-03-28 09:53:23 +05:30
|
|
|
while (sed_cmd) {
|
2003-04-09 07:13:54 +05:30
|
|
|
sed_cmd_t *sed_cmd_next = sed_cmd->next;
|
2000-02-09 01:28:47 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
if(sed_cmd->file)
|
|
|
|
bb_xprint_and_close_file(sed_cmd->file);
|
|
|
|
|
2003-03-28 09:53:23 +05:30
|
|
|
if (sed_cmd->beg_match) {
|
|
|
|
regfree(sed_cmd->beg_match);
|
|
|
|
free(sed_cmd->beg_match);
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
2003-03-28 09:53:23 +05:30
|
|
|
if (sed_cmd->end_match) {
|
|
|
|
regfree(sed_cmd->end_match);
|
|
|
|
free(sed_cmd->end_match);
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
2003-03-28 09:53:23 +05:30
|
|
|
if (sed_cmd->sub_match) {
|
|
|
|
regfree(sed_cmd->sub_match);
|
|
|
|
free(sed_cmd->sub_match);
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
free(sed_cmd->string);
|
2003-03-28 09:53:23 +05:30
|
|
|
free(sed_cmd);
|
|
|
|
sed_cmd = sed_cmd_next;
|
2000-02-09 01:28:47 +05:30
|
|
|
}
|
2004-10-30 12:24:19 +05:30
|
|
|
|
|
|
|
if(hold_space) free(hold_space);
|
2004-11-25 12:51:47 +05:30
|
|
|
|
|
|
|
while(current_input_file<input_file_count)
|
|
|
|
fclose(input_file_list[current_input_file++]);
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
2000-07-13 05:05:21 +05:30
|
|
|
#endif
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
/* If something bad happens during -i operation, delete temp file */
|
|
|
|
|
|
|
|
static void cleanup_outname(void)
|
|
|
|
{
|
|
|
|
if(outname) unlink(outname);
|
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */
|
|
|
|
|
|
|
|
static void parse_escapes(char *dest, const char *string, int len, char from, char to)
|
|
|
|
{
|
|
|
|
int i=0;
|
|
|
|
|
|
|
|
while(i<len) {
|
|
|
|
if(string[i] == '\\') {
|
2003-10-01 12:15:11 +05:30
|
|
|
if(!to || string[i+1] == from) {
|
|
|
|
*(dest++) = to ? to : string[i+1];
|
2003-10-01 08:36:16 +05:30
|
|
|
i+=2;
|
|
|
|
continue;
|
|
|
|
} else *(dest++)=string[i++];
|
|
|
|
}
|
|
|
|
*(dest++) = string[i++];
|
|
|
|
}
|
|
|
|
*dest=0;
|
|
|
|
}
|
|
|
|
|
2005-11-20 13:14:35 +05:30
|
|
|
static char *copy_parsing_escapes(const char *string, int len)
|
2003-10-01 08:36:16 +05:30
|
|
|
{
|
|
|
|
char *dest=xmalloc(len+1);
|
|
|
|
|
|
|
|
parse_escapes(dest,string,len,'n','\n');
|
|
|
|
return dest;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
/*
|
2001-01-02 16:31:31 +05:30
|
|
|
* index_of_next_unescaped_regexp_delim - walks left to right through a string
|
|
|
|
* beginning at a specified index and returns the index of the next regular
|
2004-04-14 23:21:38 +05:30
|
|
|
* expression delimiter (typically a forward * slash ('/')) not preceded by
|
2001-01-02 16:31:31 +05:30
|
|
|
* a backslash ('\').
|
2000-07-11 04:25:51 +05:30
|
|
|
*/
|
2003-04-08 17:26:11 +05:30
|
|
|
static int index_of_next_unescaped_regexp_delim(const char delimiter,
|
2003-04-11 22:40:23 +05:30
|
|
|
const char *str)
|
2000-07-11 04:25:51 +05:30
|
|
|
{
|
2001-08-24 20:15:50 +05:30
|
|
|
int bracket = -1;
|
|
|
|
int escaped = 0;
|
2003-03-30 13:32:18 +05:30
|
|
|
int idx = 0;
|
2001-11-10 16:19:42 +05:30
|
|
|
char ch;
|
2001-08-24 20:15:50 +05:30
|
|
|
|
2003-04-08 17:26:11 +05:30
|
|
|
for (; (ch = str[idx]); idx++) {
|
2001-08-24 20:15:50 +05:30
|
|
|
if (bracket != -1) {
|
2003-04-11 22:40:23 +05:30
|
|
|
if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
|
2003-10-01 08:36:16 +05:30
|
|
|
&& str[idx - 1] == '^')))
|
2001-08-24 20:15:50 +05:30
|
|
|
bracket = -1;
|
|
|
|
} else if (escaped)
|
|
|
|
escaped = 0;
|
2001-11-10 16:19:42 +05:30
|
|
|
else if (ch == '\\')
|
2001-08-24 20:15:50 +05:30
|
|
|
escaped = 1;
|
2001-11-10 16:19:42 +05:30
|
|
|
else if (ch == '[')
|
2001-08-24 20:15:50 +05:30
|
|
|
bracket = idx;
|
2003-03-09 14:14:49 +05:30
|
|
|
else if (ch == delimiter)
|
2000-07-18 01:36:42 +05:30
|
|
|
return idx;
|
|
|
|
}
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2000-07-18 01:36:42 +05:30
|
|
|
/* if we make it to here, we've hit the end of the string */
|
|
|
|
return -1;
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
|
|
|
|
2003-09-14 09:36:12 +05:30
|
|
|
/*
|
|
|
|
* Returns the index of the third delimiter
|
|
|
|
*/
|
2003-03-30 13:32:18 +05:30
|
|
|
static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
|
|
|
|
{
|
|
|
|
const char *cmdstr_ptr = cmdstr;
|
|
|
|
char delimiter;
|
|
|
|
int idx = 0;
|
|
|
|
|
2003-09-14 09:36:12 +05:30
|
|
|
/* verify that the 's' or 'y' is followed by something. That something
|
2003-03-30 13:32:18 +05:30
|
|
|
* (typically a 'slash') is now our regexp delimiter... */
|
2003-10-01 08:36:16 +05:30
|
|
|
if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst);
|
|
|
|
delimiter = *(cmdstr_ptr++);
|
2003-03-30 13:32:18 +05:30
|
|
|
|
|
|
|
/* save the match string */
|
|
|
|
idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
|
|
|
|
if (idx == -1) {
|
2003-03-30 15:08:40 +05:30
|
|
|
bb_error_msg_and_die(bad_format_in_subst);
|
2003-03-30 13:32:18 +05:30
|
|
|
}
|
2005-11-20 13:14:35 +05:30
|
|
|
*match = copy_parsing_escapes(cmdstr_ptr, idx);
|
2003-03-30 13:32:18 +05:30
|
|
|
|
|
|
|
/* save the replacement string */
|
|
|
|
cmdstr_ptr += idx + 1;
|
|
|
|
idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
|
|
|
|
if (idx == -1) {
|
2003-03-30 15:08:40 +05:30
|
|
|
bb_error_msg_and_die(bad_format_in_subst);
|
2003-03-30 13:32:18 +05:30
|
|
|
}
|
2005-11-20 13:14:35 +05:30
|
|
|
*replace = copy_parsing_escapes(cmdstr_ptr, idx);
|
2003-03-30 13:32:18 +05:30
|
|
|
|
2003-04-08 17:26:11 +05:30
|
|
|
return ((cmdstr_ptr - cmdstr) + idx);
|
2003-03-30 13:32:18 +05:30
|
|
|
}
|
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
/*
|
|
|
|
* returns the index in the string just past where the address ends.
|
|
|
|
*/
|
2003-04-08 17:26:11 +05:30
|
|
|
static int get_address(char *my_str, int *linenum, regex_t ** regex)
|
1999-12-30 03:49:46 +05:30
|
|
|
{
|
2003-09-15 14:52:04 +05:30
|
|
|
char *pos = my_str;
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2003-09-14 09:36:12 +05:30
|
|
|
if (isdigit(*my_str)) {
|
|
|
|
*linenum = strtol(my_str, &pos, 10);
|
2003-03-09 14:14:49 +05:30
|
|
|
/* endstr shouldnt ever equal NULL */
|
2003-09-14 09:36:12 +05:30
|
|
|
} else if (*my_str == '$') {
|
2001-06-12 05:20:06 +05:30
|
|
|
*linenum = -1;
|
2003-09-14 09:36:12 +05:30
|
|
|
pos++;
|
|
|
|
} else if (*my_str == '/' || *my_str == '\\') {
|
2003-10-01 08:36:16 +05:30
|
|
|
int next;
|
2003-03-30 13:32:18 +05:30
|
|
|
char delimiter;
|
2003-10-01 08:36:16 +05:30
|
|
|
char *temp;
|
2003-03-09 14:14:49 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
if (*my_str == '\\') delimiter = *(++pos);
|
|
|
|
else delimiter = '/';
|
2003-09-14 09:36:12 +05:30
|
|
|
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
|
2003-10-01 08:36:16 +05:30
|
|
|
if (next == -1)
|
2003-03-19 14:43:01 +05:30
|
|
|
bb_error_msg_and_die("unterminated match expression");
|
2004-03-15 13:59:22 +05:30
|
|
|
|
2005-11-20 13:14:35 +05:30
|
|
|
temp=copy_parsing_escapes(pos,next);
|
2003-04-08 17:26:11 +05:30
|
|
|
*regex = (regex_t *) xmalloc(sizeof(regex_t));
|
2004-05-26 15:33:33 +05:30
|
|
|
xregcomp(*regex, temp, regex_type|REG_NEWLINE);
|
2003-10-01 08:36:16 +05:30
|
|
|
free(temp);
|
|
|
|
/* Move position to next character after last delimiter */
|
|
|
|
pos+=(next+1);
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
2003-09-14 09:36:12 +05:30
|
|
|
return pos - my_str;
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
1999-12-30 03:49:46 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Grab a filename. Whitespace at start is skipped, then goes to EOL. */
|
|
|
|
static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr, char **retval)
|
|
|
|
{
|
|
|
|
int start = 0, idx, hack=0;
|
|
|
|
|
|
|
|
/* Skip whitespace, then grab filename to end of line */
|
|
|
|
while (isspace(filecmdstr[start])) start++;
|
|
|
|
idx=start;
|
|
|
|
while(filecmdstr[idx] && filecmdstr[idx]!='\n') idx++;
|
|
|
|
/* If lines glued together, put backslash back. */
|
|
|
|
if(filecmdstr[idx]=='\n') hack=1;
|
|
|
|
if(idx==start) bb_error_msg_and_die("Empty filename");
|
|
|
|
*retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1);
|
|
|
|
if(hack) *(idx+*retval)='\\';
|
|
|
|
|
|
|
|
return idx;
|
|
|
|
}
|
|
|
|
|
2003-10-09 13:48:36 +05:30
|
|
|
static int parse_subst_cmd(sed_cmd_t * const sed_cmd, char *substr)
|
2000-07-14 01:28:04 +05:30
|
|
|
{
|
2004-05-26 15:33:33 +05:30
|
|
|
int cflags = regex_type;
|
2000-07-14 01:28:04 +05:30
|
|
|
char *match;
|
|
|
|
int idx = 0;
|
|
|
|
|
|
|
|
/*
|
2003-10-01 08:36:16 +05:30
|
|
|
* A substitution command should look something like this:
|
|
|
|
* s/match/replace/ #gIpw
|
2000-08-22 02:59:20 +05:30
|
|
|
* || | |||
|
2000-07-14 01:28:04 +05:30
|
|
|
* mandatory optional
|
|
|
|
*/
|
2003-10-01 08:36:16 +05:30
|
|
|
idx = parse_regex_delim(substr, &match, &sed_cmd->string);
|
2000-07-14 01:28:04 +05:30
|
|
|
|
2000-07-18 01:36:42 +05:30
|
|
|
/* determine the number of back references in the match string */
|
|
|
|
/* Note: we compute this here rather than in the do_subst_command()
|
|
|
|
* function to save processor time, at the expense of a little more memory
|
|
|
|
* (4 bits) per sed_cmd */
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2000-07-14 01:28:04 +05:30
|
|
|
/* process the flags */
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
sed_cmd->which_match=1;
|
|
|
|
while (substr[++idx]) {
|
|
|
|
/* Parse match number */
|
|
|
|
if(isdigit(substr[idx])) {
|
|
|
|
if(match[0]!='^') {
|
|
|
|
/* Match 0 treated as all, multiple matches we take the last one. */
|
|
|
|
char *pos=substr+idx;
|
|
|
|
sed_cmd->which_match=(unsigned short)strtol(substr+idx,&pos,10);
|
|
|
|
idx=pos-substr;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
2004-01-04 12:12:14 +05:30
|
|
|
/* Skip spaces */
|
|
|
|
if(isspace(substr[idx])) continue;
|
|
|
|
|
2000-07-14 01:28:04 +05:30
|
|
|
switch (substr[idx]) {
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Replace all occurrences */
|
|
|
|
case 'g':
|
|
|
|
if (match[0] != '^') sed_cmd->which_match = 0;
|
|
|
|
break;
|
|
|
|
/* Print pattern space */
|
|
|
|
case 'p':
|
|
|
|
sed_cmd->sub_p = 1;
|
|
|
|
break;
|
2005-11-20 13:14:35 +05:30
|
|
|
/* Write to file */
|
2003-10-01 08:36:16 +05:30
|
|
|
case 'w':
|
|
|
|
{
|
|
|
|
char *temp;
|
|
|
|
idx+=parse_file_cmd(sed_cmd,substr+idx,&temp);
|
2004-03-15 13:59:22 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
break;
|
2003-09-13 20:42:22 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Ignore case (gnu exension) */
|
|
|
|
case 'I':
|
|
|
|
cflags |= REG_ICASE;
|
|
|
|
break;
|
2005-11-20 13:14:35 +05:30
|
|
|
/* Comment */
|
|
|
|
case '#':
|
|
|
|
while(substr[++idx]);
|
|
|
|
/* Fall through */
|
|
|
|
/* End of command */
|
2003-10-01 08:36:16 +05:30
|
|
|
case ';':
|
|
|
|
case '}':
|
2003-04-08 17:26:11 +05:30
|
|
|
goto out;
|
2003-10-01 08:36:16 +05:30
|
|
|
default:
|
|
|
|
bb_error_msg_and_die("bad option in substitution expression");
|
2000-07-14 01:28:04 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
}
|
2003-09-16 10:55:43 +05:30
|
|
|
out:
|
2000-07-18 01:36:42 +05:30
|
|
|
/* compile the match string into a regex */
|
2003-04-08 12:12:45 +05:30
|
|
|
if (*match != '\0') {
|
|
|
|
/* If match is empty, we use last regex used at runtime */
|
2003-04-08 17:26:11 +05:30
|
|
|
sed_cmd->sub_match = (regex_t *) xmalloc(sizeof(regex_t));
|
2003-04-08 12:12:45 +05:30
|
|
|
xregcomp(sed_cmd->sub_match, match, cflags);
|
|
|
|
}
|
2000-07-14 01:28:04 +05:30
|
|
|
free(match);
|
2000-07-15 00:36:30 +05:30
|
|
|
|
|
|
|
return idx;
|
2000-07-14 01:28:04 +05:30
|
|
|
}
|
|
|
|
|
2003-03-18 06:49:23 +05:30
|
|
|
/*
|
|
|
|
* Process the commands arguments
|
|
|
|
*/
|
2003-10-01 08:36:16 +05:30
|
|
|
static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
|
2000-07-11 04:25:51 +05:30
|
|
|
{
|
2003-03-09 20:42:24 +05:30
|
|
|
/* handle (s)ubstitution command */
|
2003-10-01 08:36:16 +05:30
|
|
|
if (sed_cmd->cmd == 's') cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
|
2003-03-09 20:42:24 +05:30
|
|
|
/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
|
|
|
|
else if (strchr("aic", sed_cmd->cmd)) {
|
|
|
|
if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
|
2003-04-08 17:26:11 +05:30
|
|
|
bb_error_msg_and_die
|
|
|
|
("only a beginning address can be specified for edit commands");
|
2004-04-01 14:53:30 +05:30
|
|
|
for(;;) {
|
|
|
|
if(*cmdstr=='\n' || *cmdstr=='\\') {
|
|
|
|
cmdstr++;
|
|
|
|
break;
|
|
|
|
} else if(isspace(*cmdstr)) cmdstr++;
|
|
|
|
else break;
|
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
sed_cmd->string = bb_xstrdup(cmdstr);
|
2003-10-01 12:15:11 +05:30
|
|
|
parse_escapes(sed_cmd->string,sed_cmd->string,strlen(cmdstr),0,0);
|
2003-10-01 08:36:16 +05:30
|
|
|
cmdstr += strlen(cmdstr);
|
2003-03-09 20:42:24 +05:30
|
|
|
/* handle file cmds: (r)ead */
|
2003-10-01 08:36:16 +05:30
|
|
|
} else if(strchr("rw", sed_cmd->cmd)) {
|
2003-03-09 20:42:24 +05:30
|
|
|
if (sed_cmd->end_line || sed_cmd->end_match)
|
2003-03-19 14:43:01 +05:30
|
|
|
bb_error_msg_and_die("Command only uses one address");
|
2003-10-01 08:36:16 +05:30
|
|
|
cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);
|
|
|
|
if(sed_cmd->cmd=='w')
|
|
|
|
sed_cmd->file=bb_xfopen(sed_cmd->string,"w");
|
2003-03-28 10:13:39 +05:30
|
|
|
/* handle branch commands */
|
2005-05-18 12:04:37 +05:30
|
|
|
} else if (strchr(":btT", sed_cmd->cmd)) {
|
2003-03-28 10:13:39 +05:30
|
|
|
int length;
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
while(isspace(*cmdstr)) cmdstr++;
|
2003-09-14 11:31:14 +05:30
|
|
|
length = strcspn(cmdstr, semicolon_whitespace);
|
|
|
|
if (length) {
|
2006-03-10 03:51:20 +05:30
|
|
|
sed_cmd->string = bb_xstrndup(cmdstr, length);
|
2003-09-14 11:31:14 +05:30
|
|
|
cmdstr += length;
|
|
|
|
}
|
2003-03-28 10:13:39 +05:30
|
|
|
}
|
2003-03-30 13:32:18 +05:30
|
|
|
/* translation command */
|
|
|
|
else if (sed_cmd->cmd == 'y') {
|
2003-10-01 08:36:16 +05:30
|
|
|
char *match, *replace;
|
|
|
|
int i=cmdstr[0];
|
|
|
|
|
|
|
|
cmdstr+=parse_regex_delim(cmdstr, &match, &replace)+1;
|
|
|
|
/* \n already parsed, but \delimiter needs unescaping. */
|
|
|
|
parse_escapes(match,match,strlen(match),i,i);
|
|
|
|
parse_escapes(replace,replace,strlen(replace),i,i);
|
|
|
|
|
|
|
|
sed_cmd->string = xcalloc(1, (strlen(match) + 1) * 2);
|
|
|
|
for (i = 0; match[i] && replace[i]; i++) {
|
|
|
|
sed_cmd->string[i * 2] = match[i];
|
|
|
|
sed_cmd->string[(i * 2) + 1] = replace[i];
|
|
|
|
}
|
|
|
|
free(match);
|
|
|
|
free(replace);
|
2003-03-30 13:32:18 +05:30
|
|
|
}
|
2003-03-18 06:49:23 +05:30
|
|
|
/* if it wasnt a single-letter command that takes no arguments
|
|
|
|
* then it must be an invalid command.
|
|
|
|
*/
|
2003-10-01 08:36:16 +05:30
|
|
|
else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
|
2003-03-19 14:43:01 +05:30
|
|
|
bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
|
2003-03-09 20:42:24 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
/* give back whatever's left over */
|
2003-04-08 17:26:11 +05:30
|
|
|
return (cmdstr);
|
2003-03-09 20:42:24 +05:30
|
|
|
}
|
|
|
|
|
2003-09-14 09:36:12 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Parse address+command sets, skipping comment lines. */
|
2003-03-09 20:42:24 +05:30
|
|
|
|
2005-04-17 01:09:00 +05:30
|
|
|
static void add_cmd(char *cmdstr)
|
2003-10-01 08:36:16 +05:30
|
|
|
{
|
|
|
|
static char *add_cmd_line=NULL;
|
|
|
|
sed_cmd_t *sed_cmd;
|
2003-10-09 12:52:59 +05:30
|
|
|
int temp;
|
2003-03-09 20:42:24 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Append this line to any unfinished line from last time. */
|
|
|
|
if(add_cmd_line) {
|
|
|
|
int lastlen=strlen(add_cmd_line);
|
2003-10-09 13:48:36 +05:30
|
|
|
char *tmp=xmalloc(lastlen+strlen(cmdstr)+2);
|
2003-10-01 08:36:16 +05:30
|
|
|
|
2003-10-09 13:48:36 +05:30
|
|
|
memcpy(tmp,add_cmd_line,lastlen);
|
|
|
|
tmp[lastlen]='\n';
|
|
|
|
strcpy(tmp+lastlen+1,cmdstr);
|
2003-10-01 08:36:16 +05:30
|
|
|
free(add_cmd_line);
|
2003-10-09 13:48:36 +05:30
|
|
|
cmdstr=add_cmd_line=tmp;
|
2003-10-01 08:36:16 +05:30
|
|
|
} else add_cmd_line=NULL;
|
|
|
|
|
|
|
|
/* If this line ends with backslash, request next line. */
|
2003-10-09 12:52:59 +05:30
|
|
|
temp=strlen(cmdstr);
|
2003-10-01 08:36:16 +05:30
|
|
|
if(temp && cmdstr[temp-1]=='\\') {
|
|
|
|
if(!add_cmd_line) add_cmd_line=strdup(cmdstr);
|
|
|
|
add_cmd_line[temp-1]=0;
|
|
|
|
return;
|
2003-03-09 20:42:24 +05:30
|
|
|
}
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Loop parsing all commands in this line. */
|
|
|
|
while(*cmdstr) {
|
|
|
|
/* Skip leading whitespace and semicolons */
|
|
|
|
cmdstr += strspn(cmdstr, semicolon_whitespace);
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* If no more commands, exit. */
|
|
|
|
if(!*cmdstr) break;
|
2003-09-14 09:36:12 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* if this is a comment, jump past it and keep going */
|
|
|
|
if (*cmdstr == '#') {
|
|
|
|
/* "#n" is the same as using -n on the command line */
|
|
|
|
if (cmdstr[1] == 'n') be_quiet++;
|
|
|
|
if(!(cmdstr=strpbrk(cmdstr, "\n\r"))) break;
|
|
|
|
continue;
|
|
|
|
}
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* parse the command
|
|
|
|
* format is: [addr][,addr][!]cmd
|
|
|
|
* |----||-----||-|
|
|
|
|
* part1 part2 part3
|
|
|
|
*/
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* first part (if present) is an address: either a '$', a number or a /regex/ */
|
|
|
|
cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
|
2001-12-21 21:34:12 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* second part (if present) will begin with a comma */
|
|
|
|
if (*cmdstr == ',') {
|
|
|
|
int idx;
|
2002-06-12 05:13:27 +05:30
|
|
|
|
2003-03-09 16:36:38 +05:30
|
|
|
cmdstr++;
|
2003-10-01 08:36:16 +05:30
|
|
|
idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
|
|
|
|
if (!idx) bb_error_msg_and_die("get_address: no address found in string\n");
|
|
|
|
cmdstr += idx;
|
2003-03-09 20:42:24 +05:30
|
|
|
}
|
2003-03-09 16:36:38 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* skip whitespace before the command */
|
|
|
|
while (isspace(*cmdstr)) cmdstr++;
|
2003-03-09 20:42:24 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Check for inversion flag */
|
|
|
|
if (*cmdstr == '!') {
|
|
|
|
sed_cmd->invert = 1;
|
|
|
|
cmdstr++;
|
2003-04-08 12:12:45 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* skip whitespace before the command */
|
|
|
|
while (isspace(*cmdstr)) cmdstr++;
|
|
|
|
}
|
2003-04-08 12:12:45 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* last part (mandatory) will be a command */
|
|
|
|
if (!*cmdstr) bb_error_msg_and_die("missing command");
|
|
|
|
sed_cmd->cmd = *(cmdstr++);
|
|
|
|
cmdstr = parse_cmd_args(sed_cmd, cmdstr);
|
1999-12-30 03:49:46 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Add the command to the command array */
|
|
|
|
sed_cmd_tail->next = sed_cmd;
|
|
|
|
sed_cmd_tail = sed_cmd_tail->next;
|
2003-03-30 09:11:53 +05:30
|
|
|
}
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* If we glued multiple lines together, free the memory. */
|
2006-03-01 22:09:45 +05:30
|
|
|
free(add_cmd_line);
|
|
|
|
add_cmd_line=NULL;
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
1999-12-30 03:49:46 +05:30
|
|
|
|
2004-11-25 12:51:47 +05:30
|
|
|
/* Append to a string, reallocating memory as necessary. */
|
|
|
|
|
2005-04-17 01:09:00 +05:30
|
|
|
static struct pipeline {
|
2003-10-01 08:36:16 +05:30
|
|
|
char *buf; /* Space to hold string */
|
|
|
|
int idx; /* Space used */
|
|
|
|
int len; /* Space allocated */
|
|
|
|
} pipeline;
|
2001-11-10 16:19:42 +05:30
|
|
|
|
2003-04-08 17:26:11 +05:30
|
|
|
#define PIPE_GROW 64
|
2001-11-10 16:19:42 +05:30
|
|
|
|
2005-04-17 01:09:00 +05:30
|
|
|
static void pipe_putc(char c)
|
2001-11-10 16:19:42 +05:30
|
|
|
{
|
2003-10-01 08:36:16 +05:30
|
|
|
if(pipeline.idx==pipeline.len) {
|
|
|
|
pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW);
|
|
|
|
pipeline.len+=PIPE_GROW;
|
2001-11-10 16:19:42 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
pipeline.buf[pipeline.idx++] = (c);
|
2001-11-10 16:19:42 +05:30
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
static void do_subst_w_backrefs(const char *line, const char *replace)
|
2000-07-18 01:36:42 +05:30
|
|
|
{
|
2003-10-01 08:36:16 +05:30
|
|
|
int i,j;
|
2000-07-18 01:36:42 +05:30
|
|
|
|
|
|
|
/* go through the replacement string */
|
|
|
|
for (i = 0; replace[i]; i++) {
|
|
|
|
/* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
|
2005-10-30 15:38:13 +05:30
|
|
|
if (replace[i] == '\\' && replace[i+1]>='0' && replace[i+1]<='9') {
|
2003-10-01 08:36:16 +05:30
|
|
|
int backref=replace[++i]-'0';
|
|
|
|
|
2000-07-18 01:36:42 +05:30
|
|
|
/* print out the text held in regmatch[backref] */
|
2003-10-01 08:36:16 +05:30
|
|
|
if(regmatch[backref].rm_so != -1)
|
|
|
|
for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
|
|
|
|
pipe_putc(line[j]);
|
2000-07-18 01:36:42 +05:30
|
|
|
}
|
|
|
|
|
2000-07-26 02:18:44 +05:30
|
|
|
/* if we find a backslash escaped character, print the character */
|
2003-10-01 08:36:16 +05:30
|
|
|
else if (replace[i] == '\\') pipe_putc(replace[++i]);
|
2003-04-11 22:40:23 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* if we find an unescaped '&' print out the whole matched text. */
|
|
|
|
else if (replace[i] == '&')
|
2000-07-18 01:36:42 +05:30
|
|
|
for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
|
2003-10-01 08:36:16 +05:30
|
|
|
pipe_putc(line[j]);
|
|
|
|
/* Otherwise just output the character. */
|
|
|
|
else pipe_putc(replace[i]);
|
2000-07-18 01:36:42 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-04-08 17:26:11 +05:30
|
|
|
static int do_subst_command(sed_cmd_t * sed_cmd, char **line)
|
2000-07-14 01:31:58 +05:30
|
|
|
{
|
2003-10-01 08:36:16 +05:30
|
|
|
char *oldline = *line;
|
2000-07-14 01:31:58 +05:30
|
|
|
int altered = 0;
|
2003-10-01 08:36:16 +05:30
|
|
|
int match_count=0;
|
2003-04-08 12:12:45 +05:30
|
|
|
regex_t *current_regex;
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Handle empty regex. */
|
2003-04-08 12:12:45 +05:30
|
|
|
if (sed_cmd->sub_match == NULL) {
|
|
|
|
current_regex = previous_regex_ptr;
|
2003-10-01 08:36:16 +05:30
|
|
|
if(!current_regex)
|
|
|
|
bb_error_msg_and_die("No previous regexp.");
|
|
|
|
} else previous_regex_ptr = current_regex = sed_cmd->sub_match;
|
2000-07-14 01:31:58 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Find the first match */
|
|
|
|
if(REG_NOMATCH==regexec(current_regex, oldline, 10, regmatch, 0))
|
2000-11-04 01:17:00 +05:30
|
|
|
return 0;
|
2000-07-18 01:36:42 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Initialize temporary output buffer. */
|
|
|
|
pipeline.buf=xmalloc(PIPE_GROW);
|
|
|
|
pipeline.len=PIPE_GROW;
|
|
|
|
pipeline.idx=0;
|
|
|
|
|
|
|
|
/* Now loop through, substituting for matches */
|
|
|
|
do {
|
2000-11-04 01:17:00 +05:30
|
|
|
int i;
|
2000-07-14 01:31:58 +05:30
|
|
|
|
2004-02-04 16:27:46 +05:30
|
|
|
/* Work around bug in glibc regexec, demonstrated by:
|
|
|
|
echo " a.b" | busybox sed 's [^ .]* x g'
|
|
|
|
The match_count check is so not to break
|
|
|
|
echo "hi" | busybox sed 's/^/!/g' */
|
|
|
|
if(!regmatch[0].rm_so && !regmatch[0].rm_eo && match_count) {
|
|
|
|
pipe_putc(*(oldline++));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
match_count++;
|
|
|
|
|
|
|
|
/* If we aren't interested in this match, output old line to
|
|
|
|
end of match and continue */
|
|
|
|
if(sed_cmd->which_match && sed_cmd->which_match!=match_count) {
|
|
|
|
for(i=0;i<regmatch[0].rm_eo;i++)
|
|
|
|
pipe_putc(oldline[i]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2000-11-04 01:17:00 +05:30
|
|
|
/* print everything before the match */
|
2003-10-01 08:36:16 +05:30
|
|
|
for (i = 0; i < regmatch[0].rm_so; i++) pipe_putc(oldline[i]);
|
2000-07-14 01:31:58 +05:30
|
|
|
|
2000-11-04 01:17:00 +05:30
|
|
|
/* then print the substitution string */
|
2003-10-01 08:36:16 +05:30
|
|
|
do_subst_w_backrefs(oldline, sed_cmd->string);
|
2000-11-04 01:17:00 +05:30
|
|
|
|
|
|
|
/* advance past the match */
|
2003-10-01 08:36:16 +05:30
|
|
|
oldline += regmatch[0].rm_eo;
|
2000-11-04 01:17:00 +05:30
|
|
|
/* flag that something has changed */
|
|
|
|
altered++;
|
2000-07-18 01:36:42 +05:30
|
|
|
|
2000-11-04 01:17:00 +05:30
|
|
|
/* if we're not doing this globally, get out now */
|
2003-10-01 08:36:16 +05:30
|
|
|
if (sed_cmd->which_match) break;
|
|
|
|
} while (*oldline && (regexec(current_regex, oldline, 10, regmatch, 0) != REG_NOMATCH));
|
2000-11-04 01:17:00 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Copy rest of string into output pipeline */
|
|
|
|
|
|
|
|
while(*oldline) pipe_putc(*(oldline++));
|
|
|
|
pipe_putc(0);
|
2000-11-04 01:17:00 +05:30
|
|
|
|
2001-08-22 11:28:16 +05:30
|
|
|
free(*line);
|
2003-10-01 08:36:16 +05:30
|
|
|
*line = pipeline.buf;
|
2000-07-14 01:31:58 +05:30
|
|
|
return altered;
|
|
|
|
}
|
2000-03-07 00:50:35 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Set command pointer to point to this label. (Does not handle null label.) */
|
2003-03-28 10:13:39 +05:30
|
|
|
static sed_cmd_t *branch_to(const char *label)
|
|
|
|
{
|
|
|
|
sed_cmd_t *sed_cmd;
|
2003-04-11 22:40:23 +05:30
|
|
|
|
2003-04-09 07:13:54 +05:30
|
|
|
for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
|
2003-10-01 08:36:16 +05:30
|
|
|
if ((sed_cmd->cmd == ':') && (sed_cmd->string) && (strcmp(sed_cmd->string, label) == 0)) {
|
2003-09-14 11:31:14 +05:30
|
|
|
return (sed_cmd);
|
2003-03-28 10:13:39 +05:30
|
|
|
}
|
|
|
|
}
|
2003-09-14 11:31:14 +05:30
|
|
|
bb_error_msg_and_die("Can't find label for jump to `%s'", label);
|
2003-03-28 10:13:39 +05:30
|
|
|
}
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Append copy of string to append buffer */
|
|
|
|
static void append(char *s)
|
2000-07-11 04:25:51 +05:30
|
|
|
{
|
2003-10-01 08:36:16 +05:30
|
|
|
struct append_list *temp=calloc(1,sizeof(struct append_list));
|
|
|
|
|
|
|
|
if(append_head)
|
|
|
|
append_tail=(append_tail->next=temp);
|
|
|
|
else append_head=append_tail=temp;
|
|
|
|
temp->string=strdup(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void flush_append(void)
|
|
|
|
{
|
|
|
|
/* Output appended lines. */
|
|
|
|
while(append_head) {
|
2004-02-18 15:24:15 +05:30
|
|
|
fprintf(nonstdout,"%s\n",append_head->string);
|
2003-10-01 08:36:16 +05:30
|
|
|
append_tail=append_head->next;
|
|
|
|
free(append_head->string);
|
|
|
|
free(append_head);
|
|
|
|
append_head=append_tail;
|
|
|
|
}
|
|
|
|
append_head=append_tail=NULL;
|
|
|
|
}
|
|
|
|
|
2005-04-17 01:09:00 +05:30
|
|
|
static void add_input_file(FILE *file)
|
2004-11-25 12:51:47 +05:30
|
|
|
{
|
|
|
|
input_file_list=xrealloc(input_file_list,(input_file_count+1)*sizeof(FILE *));
|
|
|
|
input_file_list[input_file_count++]=file;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get next line of input from input_file_list, flushing append buffer and
|
|
|
|
* noting if we ran out of files without a newline on the last line we read.
|
2003-10-01 08:36:16 +05:30
|
|
|
*/
|
2004-11-25 12:51:47 +05:30
|
|
|
static char *get_next_line(int *no_newline)
|
2003-10-01 08:36:16 +05:30
|
|
|
{
|
2004-11-25 12:51:47 +05:30
|
|
|
char *temp=NULL;
|
2003-10-01 08:36:16 +05:30
|
|
|
int len;
|
|
|
|
|
|
|
|
flush_append();
|
2006-02-24 08:00:39 +05:30
|
|
|
while (current_input_file<input_file_count) {
|
|
|
|
temp = bb_get_chunk_from_file(input_file_list[current_input_file],&len);
|
|
|
|
if (temp) {
|
|
|
|
*no_newline = !(len && temp[len-1]=='\n');
|
|
|
|
if (!*no_newline) temp[len-1] = 0;
|
2004-11-25 12:51:47 +05:30
|
|
|
break;
|
2006-02-24 08:00:39 +05:30
|
|
|
// Close this file and advance to next one
|
2004-11-25 12:51:47 +05:30
|
|
|
} else fclose(input_file_list[current_input_file++]);
|
2003-03-08 10:51:02 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Output line of text. missing_newline means the last line output did not
|
|
|
|
end with a newline. no_newline means this line does not end with a
|
|
|
|
newline. */
|
|
|
|
|
|
|
|
static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline)
|
|
|
|
{
|
|
|
|
if(missing_newline) fputc('\n',file);
|
|
|
|
fputs(s,file);
|
|
|
|
if(!no_newline) fputc('\n',file);
|
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
if(ferror(file)) {
|
|
|
|
fprintf(stderr,"Write failed.\n");
|
|
|
|
exit(4); /* It's what gnu sed exits with... */
|
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
return no_newline;
|
|
|
|
}
|
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,nonstdout,missing_newline,n)
|
2003-10-01 08:36:16 +05:30
|
|
|
|
2006-02-24 08:00:39 +05:30
|
|
|
/* Process all the lines in all the files */
|
|
|
|
|
2004-11-25 12:51:47 +05:30
|
|
|
static void process_files(void)
|
2003-10-01 08:36:16 +05:30
|
|
|
{
|
2004-10-30 12:24:19 +05:30
|
|
|
char *pattern_space, *next_line;
|
2004-11-25 12:51:47 +05:30
|
|
|
int linenum = 0, missing_newline=0;
|
2003-10-01 08:36:16 +05:30
|
|
|
int no_newline,next_no_newline=0;
|
|
|
|
|
2006-02-24 08:00:39 +05:30
|
|
|
/* Prime the pump */
|
2004-11-25 12:51:47 +05:30
|
|
|
next_line = get_next_line(&next_no_newline);
|
2003-10-01 08:36:16 +05:30
|
|
|
|
2004-11-25 12:51:47 +05:30
|
|
|
/* go through every line in each file */
|
2003-10-01 08:36:16 +05:30
|
|
|
for(;;) {
|
2003-03-28 09:23:31 +05:30
|
|
|
sed_cmd_t *sed_cmd;
|
2003-10-01 08:36:16 +05:30
|
|
|
int substituted=0;
|
|
|
|
|
|
|
|
/* Advance to next line. Stop if out of lines. */
|
|
|
|
if(!(pattern_space=next_line)) break;
|
|
|
|
no_newline=next_no_newline;
|
2003-03-08 10:51:02 +05:30
|
|
|
|
2006-02-24 08:00:39 +05:30
|
|
|
/* Read one line in advance so we can act on the last line,
|
|
|
|
* the '$' address */
|
2004-11-25 12:51:47 +05:30
|
|
|
next_line = get_next_line(&next_no_newline);
|
2000-07-11 04:25:51 +05:30
|
|
|
linenum++;
|
2003-10-01 08:36:16 +05:30
|
|
|
restart:
|
2000-07-11 04:25:51 +05:30
|
|
|
/* for every line, go through all the commands */
|
2003-04-11 22:40:23 +05:30
|
|
|
for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
|
2003-12-23 14:23:51 +05:30
|
|
|
int old_matched, matched;
|
|
|
|
|
|
|
|
old_matched = sed_cmd->in_match;
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
/* Determine if this command matches this line: */
|
|
|
|
|
|
|
|
/* Are we continuing a previous multi-line match? */
|
|
|
|
|
|
|
|
sed_cmd->in_match = sed_cmd->in_match
|
|
|
|
|
|
|
|
/* Or is no range necessary? */
|
|
|
|
|| (!sed_cmd->beg_line && !sed_cmd->end_line
|
|
|
|
&& !sed_cmd->beg_match && !sed_cmd->end_match)
|
|
|
|
|
|
|
|
/* Or did we match the start of a numerical range? */
|
|
|
|
|| (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))
|
|
|
|
|
|
|
|
/* Or does this line match our begin address regex? */
|
|
|
|
|| (sed_cmd->beg_match &&
|
|
|
|
!regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0))
|
|
|
|
|
|
|
|
/* Or did we match last line of input? */
|
|
|
|
|| (sed_cmd->beg_line == -1 && next_line == NULL);
|
|
|
|
|
|
|
|
/* Snapshot the value */
|
|
|
|
|
|
|
|
matched = sed_cmd->in_match;
|
|
|
|
|
|
|
|
/* Is this line the end of the current match? */
|
|
|
|
|
|
|
|
if(matched) {
|
|
|
|
sed_cmd->in_match = !(
|
|
|
|
/* has the ending line come, or is this a single address command? */
|
|
|
|
(sed_cmd->end_line ?
|
|
|
|
sed_cmd->end_line==-1 ?
|
|
|
|
!next_line
|
|
|
|
: sed_cmd->end_line<=linenum
|
|
|
|
: !sed_cmd->end_match)
|
|
|
|
/* or does this line matches our last address regex */
|
2003-12-23 14:23:51 +05:30
|
|
|
|| (sed_cmd->end_match && old_matched && (regexec(sed_cmd->end_match, pattern_space, 0, NULL, 0) == 0))
|
2003-04-08 17:26:11 +05:30
|
|
|
);
|
2003-10-01 08:36:16 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
/* Skip blocks of commands we didn't match. */
|
2003-04-12 21:40:42 +05:30
|
|
|
if (sed_cmd->cmd == '{') {
|
2003-10-01 08:36:16 +05:30
|
|
|
if(sed_cmd->invert ? matched : !matched)
|
|
|
|
while(sed_cmd && sed_cmd->cmd!='}') sed_cmd=sed_cmd->next;
|
|
|
|
if(!sed_cmd) bb_error_msg_and_die("Unterminated {");
|
|
|
|
continue;
|
2003-04-12 21:40:42 +05:30
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Okay, so did this line match? */
|
|
|
|
if (sed_cmd->invert ? !matched : matched) {
|
|
|
|
/* Update last used regex in case a blank substitute BRE is found */
|
2003-04-08 12:12:45 +05:30
|
|
|
if (sed_cmd->beg_match) {
|
|
|
|
previous_regex_ptr = sed_cmd->beg_match;
|
|
|
|
}
|
2001-06-12 05:20:06 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* actual sedding */
|
2001-11-10 16:19:42 +05:30
|
|
|
switch (sed_cmd->cmd) {
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
/* Print line number */
|
|
|
|
case '=':
|
2004-02-18 15:24:15 +05:30
|
|
|
fprintf(nonstdout,"%d\n", linenum);
|
2003-09-15 11:58:45 +05:30
|
|
|
break;
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
/* Write the current pattern space up to the first newline */
|
|
|
|
case 'P':
|
2003-04-08 17:26:11 +05:30
|
|
|
{
|
2003-10-01 08:36:16 +05:30
|
|
|
char *tmp = strchr(pattern_space, '\n');
|
2003-03-30 09:11:53 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
if (tmp) {
|
|
|
|
*tmp = '\0';
|
|
|
|
sed_puts(pattern_space,1);
|
|
|
|
*tmp = '\n';
|
|
|
|
break;
|
2003-03-30 09:11:53 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Fall Through */
|
2003-04-08 17:26:11 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
/* Write the current pattern space to output */
|
|
|
|
case 'p':
|
|
|
|
sed_puts(pattern_space,no_newline);
|
|
|
|
break;
|
|
|
|
/* Delete up through first newline */
|
|
|
|
case 'D':
|
|
|
|
{
|
|
|
|
char *tmp = strchr(pattern_space,'\n');
|
|
|
|
|
|
|
|
if(tmp) {
|
|
|
|
tmp=bb_xstrdup(tmp+1);
|
|
|
|
free(pattern_space);
|
|
|
|
pattern_space=tmp;
|
|
|
|
goto restart;
|
|
|
|
}
|
2003-04-08 17:26:11 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
/* discard this line. */
|
|
|
|
case 'd':
|
|
|
|
goto discard_line;
|
|
|
|
|
|
|
|
/* Substitute with regex */
|
|
|
|
case 's':
|
|
|
|
if(do_subst_command(sed_cmd, &pattern_space)) {
|
|
|
|
substituted|=1;
|
|
|
|
|
|
|
|
/* handle p option */
|
|
|
|
if(sed_cmd->sub_p)
|
|
|
|
sed_puts(pattern_space,no_newline);
|
|
|
|
/* handle w option */
|
|
|
|
if(sed_cmd->file)
|
|
|
|
sed_cmd->no_newline=puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline);
|
|
|
|
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Append line to linked list to be printed later */
|
|
|
|
case 'a':
|
|
|
|
{
|
|
|
|
append(sed_cmd->string);
|
|
|
|
break;
|
2003-04-08 17:26:11 +05:30
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Insert text before this line */
|
|
|
|
case 'i':
|
|
|
|
sed_puts(sed_cmd->string,1);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Cut and paste text (replace) */
|
|
|
|
case 'c':
|
|
|
|
/* Only triggers on last line of a matching range. */
|
2004-10-30 12:24:19 +05:30
|
|
|
if (!sed_cmd->in_match) sed_puts(sed_cmd->string,0);
|
2003-10-01 08:36:16 +05:30
|
|
|
goto discard_line;
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Read file, append contents to output */
|
|
|
|
case 'r':
|
|
|
|
{
|
2006-02-24 08:00:39 +05:30
|
|
|
FILE *rfile;
|
2003-04-11 22:40:23 +05:30
|
|
|
|
2006-02-24 08:00:39 +05:30
|
|
|
rfile = fopen(sed_cmd->string, "r");
|
|
|
|
if (rfile) {
|
2003-10-01 08:36:16 +05:30
|
|
|
char *line;
|
2003-04-11 22:40:23 +05:30
|
|
|
|
2006-02-24 08:00:39 +05:30
|
|
|
while ((line = bb_get_chomped_line_from_file(rfile))
|
2003-10-01 08:36:16 +05:30
|
|
|
!= NULL)
|
|
|
|
append(line);
|
2006-02-24 08:00:39 +05:30
|
|
|
bb_xprint_and_close_file(rfile);
|
2003-04-09 21:22:32 +05:30
|
|
|
}
|
|
|
|
|
2003-09-16 10:55:43 +05:30
|
|
|
break;
|
2003-04-08 17:26:11 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
|
|
|
|
/* Write pattern space to file. */
|
|
|
|
case 'w':
|
|
|
|
sed_cmd->no_newline=puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Read next line from input */
|
|
|
|
case 'n':
|
|
|
|
if (!be_quiet)
|
|
|
|
sed_puts(pattern_space,no_newline);
|
|
|
|
if (next_line) {
|
|
|
|
free(pattern_space);
|
|
|
|
pattern_space = next_line;
|
|
|
|
no_newline=next_no_newline;
|
2004-11-25 12:51:47 +05:30
|
|
|
next_line = get_next_line(&next_no_newline);
|
2003-10-01 08:36:16 +05:30
|
|
|
linenum++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* fall through */
|
|
|
|
|
|
|
|
/* Quit. End of script, end of input. */
|
|
|
|
case 'q':
|
|
|
|
/* Exit the outer while loop */
|
|
|
|
free(next_line);
|
|
|
|
next_line = NULL;
|
|
|
|
goto discard_commands;
|
|
|
|
|
|
|
|
/* Append the next line to the current line */
|
|
|
|
case 'N':
|
2003-09-14 11:31:14 +05:30
|
|
|
{
|
2003-10-01 08:36:16 +05:30
|
|
|
/* If no next line, jump to end of script and exit. */
|
|
|
|
if (next_line == NULL) {
|
|
|
|
/* Jump to end of script and exit */
|
|
|
|
free(next_line);
|
|
|
|
next_line = NULL;
|
|
|
|
goto discard_line;
|
|
|
|
/* append next_line, read new next_line. */
|
2003-09-14 11:31:14 +05:30
|
|
|
} else {
|
2003-10-01 08:36:16 +05:30
|
|
|
int len=strlen(pattern_space);
|
|
|
|
|
|
|
|
pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
|
|
|
|
pattern_space[len]='\n';
|
|
|
|
strcpy(pattern_space+len+1, next_line);
|
|
|
|
no_newline=next_no_newline;
|
2004-11-25 12:51:47 +05:30
|
|
|
next_line = get_next_line(&next_no_newline);
|
2003-10-01 08:36:16 +05:30
|
|
|
linenum++;
|
2003-09-14 11:31:14 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
break;
|
2003-04-08 17:26:11 +05:30
|
|
|
}
|
|
|
|
|
2005-05-18 12:04:37 +05:30
|
|
|
/* Test/branch if substitution occurred */
|
2003-10-01 08:36:16 +05:30
|
|
|
case 't':
|
2005-05-18 12:04:37 +05:30
|
|
|
if(!substituted) break;
|
2003-10-01 08:36:16 +05:30
|
|
|
substituted=0;
|
2005-05-18 12:04:37 +05:30
|
|
|
/* Fall through */
|
|
|
|
/* Test/branch if substitution didn't occur */
|
|
|
|
case 'T':
|
|
|
|
if (substituted) break;
|
|
|
|
/* Fall through */
|
2003-10-01 08:36:16 +05:30
|
|
|
/* Branch to label */
|
|
|
|
case 'b':
|
|
|
|
if (!sed_cmd->string) goto discard_commands;
|
|
|
|
else sed_cmd = branch_to(sed_cmd->string);
|
|
|
|
break;
|
|
|
|
/* Transliterate characters */
|
|
|
|
case 'y':
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; pattern_space[i]; i++) {
|
|
|
|
int j;
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
for (j = 0; sed_cmd->string[j]; j += 2) {
|
|
|
|
if (pattern_space[i] == sed_cmd->string[j]) {
|
|
|
|
pattern_space[i] = sed_cmd->string[j + 1];
|
2005-09-02 05:40:06 +05:30
|
|
|
break;
|
2003-10-01 08:36:16 +05:30
|
|
|
}
|
2003-03-30 13:32:18 +05:30
|
|
|
}
|
|
|
|
}
|
2003-09-14 21:58:08 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
break;
|
2003-09-14 21:58:08 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
case 'g': /* Replace pattern space with hold space */
|
|
|
|
free(pattern_space);
|
2004-10-30 12:24:19 +05:30
|
|
|
pattern_space = strdup(hold_space ? hold_space : "");
|
2003-10-01 08:36:16 +05:30
|
|
|
break;
|
|
|
|
case 'G': /* Append newline and hold space to pattern space */
|
|
|
|
{
|
|
|
|
int pattern_space_size = 2;
|
|
|
|
int hold_space_size = 0;
|
|
|
|
|
|
|
|
if (pattern_space)
|
|
|
|
pattern_space_size += strlen(pattern_space);
|
|
|
|
if (hold_space) hold_space_size = strlen(hold_space);
|
|
|
|
pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size);
|
|
|
|
if (pattern_space_size == 2) pattern_space[0]=0;
|
2003-09-15 17:37:48 +05:30
|
|
|
strcat(pattern_space, "\n");
|
2003-10-01 08:36:16 +05:30
|
|
|
if (hold_space) strcat(pattern_space, hold_space);
|
|
|
|
no_newline=0;
|
|
|
|
|
|
|
|
break;
|
2003-09-14 21:58:08 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
case 'h': /* Replace hold space with pattern space */
|
|
|
|
free(hold_space);
|
|
|
|
hold_space = strdup(pattern_space);
|
|
|
|
break;
|
|
|
|
case 'H': /* Append newline and pattern space to hold space */
|
|
|
|
{
|
|
|
|
int hold_space_size = 2;
|
|
|
|
int pattern_space_size = 0;
|
2003-09-14 20:54:18 +05:30
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
if (hold_space) hold_space_size += strlen(hold_space);
|
|
|
|
if (pattern_space)
|
|
|
|
pattern_space_size = strlen(pattern_space);
|
|
|
|
hold_space = xrealloc(hold_space,
|
|
|
|
hold_space_size + pattern_space_size);
|
|
|
|
|
|
|
|
if (hold_space_size == 2) hold_space[0]=0;
|
2003-09-14 20:54:18 +05:30
|
|
|
strcat(hold_space, "\n");
|
2003-10-01 08:36:16 +05:30
|
|
|
if (pattern_space) strcat(hold_space, pattern_space);
|
|
|
|
|
|
|
|
break;
|
2003-09-14 20:54:18 +05:30
|
|
|
}
|
2003-10-01 08:36:16 +05:30
|
|
|
case 'x': /* Exchange hold and pattern space */
|
|
|
|
{
|
|
|
|
char *tmp = pattern_space;
|
|
|
|
pattern_space = hold_space;
|
|
|
|
no_newline=0;
|
|
|
|
hold_space = tmp;
|
|
|
|
break;
|
2003-09-14 06:55:31 +05:30
|
|
|
}
|
2001-06-12 05:20:06 +05:30
|
|
|
}
|
2002-06-12 05:13:27 +05:30
|
|
|
}
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
|
|
|
|
2003-10-01 08:36:16 +05:30
|
|
|
/*
|
|
|
|
* exit point from sedding...
|
|
|
|
*/
|
|
|
|
discard_commands:
|
|
|
|
/* we will print the line unless we were told to be quiet ('-n')
|
|
|
|
or if the line was suppressed (ala 'd'elete) */
|
|
|
|
if (!be_quiet) sed_puts(pattern_space,no_newline);
|
|
|
|
|
|
|
|
/* Delete and such jump here. */
|
|
|
|
discard_line:
|
|
|
|
flush_append();
|
2003-04-07 17:54:44 +05:30
|
|
|
free(pattern_space);
|
2003-10-01 08:36:16 +05:30
|
|
|
}
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
|
|
|
|
2003-10-04 10:57:56 +05:30
|
|
|
/* It is possible to have a command line argument with embedded
|
|
|
|
newlines. This counts as multiple command lines. */
|
|
|
|
|
|
|
|
static void add_cmd_block(char *cmdstr)
|
|
|
|
{
|
|
|
|
int go=1;
|
|
|
|
char *temp=bb_xstrdup(cmdstr),*temp2=temp;
|
|
|
|
|
|
|
|
while(go) {
|
|
|
|
int len=strcspn(temp2,"\n");
|
|
|
|
if(!temp2[len]) go=0;
|
|
|
|
else temp2[len]=0;
|
|
|
|
add_cmd(temp2);
|
|
|
|
temp2+=len+1;
|
|
|
|
}
|
|
|
|
free(temp);
|
|
|
|
}
|
|
|
|
|
2006-03-07 02:17:33 +05:30
|
|
|
int sed_main(int argc, char **argv)
|
2000-07-11 04:25:51 +05:30
|
|
|
{
|
2004-10-30 12:24:19 +05:30
|
|
|
int status = EXIT_SUCCESS, opt, getpat = 1;
|
2000-07-11 04:25:51 +05:30
|
|
|
|
2000-07-12 03:08:47 +05:30
|
|
|
/* destroy command strings on exit */
|
2006-03-02 15:36:22 +05:30
|
|
|
if (ENABLE_FEATURE_CLEAN_UP && atexit(sed_free_and_close_stuff) == -1)
|
2003-03-19 14:43:01 +05:30
|
|
|
bb_perror_msg_and_die("atexit");
|
2000-07-12 03:08:47 +05:30
|
|
|
|
2005-11-20 13:14:35 +05:30
|
|
|
/* Lie to autoconf when it starts asking stupid questions. */
|
2004-02-04 16:27:46 +05:30
|
|
|
if(argc==2 && !strcmp(argv[1],"--version")) {
|
|
|
|
printf("This is not GNU sed version 4.0\n");
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
/* do normal option parsing */
|
2004-05-26 15:33:33 +05:30
|
|
|
while ((opt = getopt(argc, argv, "irne:f:")) > 0) {
|
2000-07-11 04:25:51 +05:30
|
|
|
switch (opt) {
|
2004-02-18 15:24:15 +05:30
|
|
|
case 'i':
|
|
|
|
in_place++;
|
|
|
|
atexit(cleanup_outname);
|
|
|
|
break;
|
2004-05-26 15:33:33 +05:30
|
|
|
case 'r':
|
|
|
|
regex_type|=REG_EXTENDED;
|
|
|
|
break;
|
2003-04-08 17:26:11 +05:30
|
|
|
case 'n':
|
|
|
|
be_quiet++;
|
|
|
|
break;
|
2003-10-01 08:36:16 +05:30
|
|
|
case 'e':
|
2003-10-04 10:57:56 +05:30
|
|
|
add_cmd_block(optarg);
|
So I'm building a linux from scratch system, using a working script to do this
that the _only_ change to is that gnu sed has been replaced with busybox sed.
And ncurses' install phase hangs. I trace it down, and it's trying to run
gawk. (Insert obligatory doubletake, but this is FSF code we're talking
about, so...)
It turns out gawk shells out to sed, ala "sed -f /tmp/blah file.h". The
/tmp/blah file is basically empty (it contains one character, a newline). So
basically, gawk is using sed as "cat". With gnu sed, it works like cat,
anyway.
With busybox sed, it tests if its command list is empty after parsing the
command line, and if the list is empty it takes the first file argument as a
sed command string, and if that leaves the file list empty it tries to read
the data to operate on from stdin. (Hence the hang, since nothing's coming
in on stdin...)
It _should_ be testing whether there were any instances of -f or -e, not
whether it actually got any commands. Using sed as cat may be kind of
stupid, but it's valid and gawk relies on this behavior.
Here's a patch to fix it, turning a couple of ints into chars in hopes of
saving a bit of the space this adds. Comments?
Rob
2004-04-21 06:26:22 +05:30
|
|
|
getpat=0;
|
2003-04-08 17:26:11 +05:30
|
|
|
break;
|
|
|
|
case 'f':
|
2003-10-01 08:36:16 +05:30
|
|
|
{
|
|
|
|
FILE *cmdfile;
|
|
|
|
char *line;
|
|
|
|
|
|
|
|
cmdfile = bb_xfopen(optarg, "r");
|
|
|
|
|
2005-11-20 13:14:35 +05:30
|
|
|
while ((line = bb_get_chomped_line_from_file(cmdfile)) != NULL) {
|
2003-10-01 08:36:16 +05:30
|
|
|
add_cmd(line);
|
So I'm building a linux from scratch system, using a working script to do this
that the _only_ change to is that gnu sed has been replaced with busybox sed.
And ncurses' install phase hangs. I trace it down, and it's trying to run
gawk. (Insert obligatory doubletake, but this is FSF code we're talking
about, so...)
It turns out gawk shells out to sed, ala "sed -f /tmp/blah file.h". The
/tmp/blah file is basically empty (it contains one character, a newline). So
basically, gawk is using sed as "cat". With gnu sed, it works like cat,
anyway.
With busybox sed, it tests if its command list is empty after parsing the
command line, and if the list is empty it takes the first file argument as a
sed command string, and if that leaves the file list empty it tries to read
the data to operate on from stdin. (Hence the hang, since nothing's coming
in on stdin...)
It _should_ be testing whether there were any instances of -f or -e, not
whether it actually got any commands. Using sed as cat may be kind of
stupid, but it's valid and gawk relies on this behavior.
Here's a patch to fix it, turning a couple of ints into chars in hopes of
saving a bit of the space this adds. Comments?
Rob
2004-04-21 06:26:22 +05:30
|
|
|
getpat=0;
|
2003-10-01 08:36:16 +05:30
|
|
|
free(line);
|
|
|
|
}
|
|
|
|
bb_xprint_and_close_file(cmdfile);
|
|
|
|
|
2003-04-08 17:26:11 +05:30
|
|
|
break;
|
2003-10-01 08:36:16 +05:30
|
|
|
}
|
2003-04-08 17:26:11 +05:30
|
|
|
default:
|
|
|
|
bb_show_usage();
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-11-25 12:51:47 +05:30
|
|
|
/* if we didn't get a pattern from -e or -f, use argv[optind] */
|
So I'm building a linux from scratch system, using a working script to do this
that the _only_ change to is that gnu sed has been replaced with busybox sed.
And ncurses' install phase hangs. I trace it down, and it's trying to run
gawk. (Insert obligatory doubletake, but this is FSF code we're talking
about, so...)
It turns out gawk shells out to sed, ala "sed -f /tmp/blah file.h". The
/tmp/blah file is basically empty (it contains one character, a newline). So
basically, gawk is using sed as "cat". With gnu sed, it works like cat,
anyway.
With busybox sed, it tests if its command list is empty after parsing the
command line, and if the list is empty it takes the first file argument as a
sed command string, and if that leaves the file list empty it tries to read
the data to operate on from stdin. (Hence the hang, since nothing's coming
in on stdin...)
It _should_ be testing whether there were any instances of -f or -e, not
whether it actually got any commands. Using sed as cat may be kind of
stupid, but it's valid and gawk relies on this behavior.
Here's a patch to fix it, turning a couple of ints into chars in hopes of
saving a bit of the space this adds. Comments?
Rob
2004-04-21 06:26:22 +05:30
|
|
|
if(getpat) {
|
2000-07-11 04:25:51 +05:30
|
|
|
if (argv[optind] == NULL)
|
2003-03-19 14:43:01 +05:30
|
|
|
bb_show_usage();
|
2003-09-14 09:36:12 +05:30
|
|
|
else
|
2003-10-04 10:57:56 +05:30
|
|
|
add_cmd_block(argv[optind++]);
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|
2003-10-04 10:57:56 +05:30
|
|
|
/* Flush any unfinished commands. */
|
|
|
|
add_cmd("");
|
1999-12-30 03:49:46 +05:30
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
/* By default, we write to stdout */
|
|
|
|
nonstdout=stdout;
|
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
/* argv[(optind)..(argc-1)] should be names of file to process. If no
|
|
|
|
* files were specified or '-' was specified, take input from stdin.
|
|
|
|
* Otherwise, we process all the files specified. */
|
2003-09-14 09:36:12 +05:30
|
|
|
if (argv[optind] == NULL) {
|
Doug Swarin pointed out a security bug in the -i option of sed.
While the permissions on the temp file are correct to prevent it from being
maliciously mangled by passing strangers, (created with 600, opened O_EXCL,
etc), the permissions on the _directory_ might not be, and we re-open the
file to convert the filehandle to a FILE * (and automatically get an error
message and exit if the directory's read-only or out of space or some such).
This opens a potential race condition if somebody's using dnotify on the
directory, deletes/renames the tempfile, and drops a symlink or something
there. Somebody running sed -i as root in a world writeable directory could
do damage.
I dug up notes on an earlier discussion where we looked at the security
implications of this (unfortunately on the #uclibc channel rather than email;
I don't have a transcript, just notes-to-self) which pointed out that if the
permissions on the directory allow other people's files to be deleted/renamed
then the original file is vulnerable to sabotage anyway. However, there are
two cases that discussion apparently didn't take into account:
1) Using another user's permissions to damage files in other directories you
can't access (standard symlink attack).
2) Reading data another user couldn't otherwise access by having the new file
belong to that other user.
This patch uses fdopen to convert the filehandle into a FILE *, rather than
reopening the file.
2005-05-18 11:26:16 +05:30
|
|
|
if(in_place) bb_error_msg_and_die("Filename required for -i");
|
2004-11-25 12:51:47 +05:30
|
|
|
add_input_file(stdin);
|
|
|
|
process_files();
|
2003-04-08 17:26:11 +05:30
|
|
|
} else {
|
2000-07-11 04:25:51 +05:30
|
|
|
int i;
|
|
|
|
FILE *file;
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2000-07-11 04:25:51 +05:30
|
|
|
for (i = optind; i < argc; i++) {
|
2004-02-18 15:24:15 +05:30
|
|
|
if(!strcmp(argv[i], "-") && !in_place) {
|
2004-11-25 12:51:47 +05:30
|
|
|
add_input_file(stdin);
|
|
|
|
process_files();
|
2003-09-14 09:36:12 +05:30
|
|
|
} else {
|
|
|
|
file = bb_wfopen(argv[i], "r");
|
|
|
|
if (file) {
|
2004-02-18 15:24:15 +05:30
|
|
|
if(in_place) {
|
|
|
|
struct stat statbuf;
|
Doug Swarin pointed out a security bug in the -i option of sed.
While the permissions on the temp file are correct to prevent it from being
maliciously mangled by passing strangers, (created with 600, opened O_EXCL,
etc), the permissions on the _directory_ might not be, and we re-open the
file to convert the filehandle to a FILE * (and automatically get an error
message and exit if the directory's read-only or out of space or some such).
This opens a potential race condition if somebody's using dnotify on the
directory, deletes/renames the tempfile, and drops a symlink or something
there. Somebody running sed -i as root in a world writeable directory could
do damage.
I dug up notes on an earlier discussion where we looked at the security
implications of this (unfortunately on the #uclibc channel rather than email;
I don't have a transcript, just notes-to-self) which pointed out that if the
permissions on the directory allow other people's files to be deleted/renamed
then the original file is vulnerable to sabotage anyway. However, there are
two cases that discussion apparently didn't take into account:
1) Using another user's permissions to damage files in other directories you
can't access (standard symlink attack).
2) Reading data another user couldn't otherwise access by having the new file
belong to that other user.
This patch uses fdopen to convert the filehandle into a FILE *, rather than
reopening the file.
2005-05-18 11:26:16 +05:30
|
|
|
int nonstdoutfd;
|
2006-01-25 05:38:53 +05:30
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
outname=bb_xstrndup(argv[i],strlen(argv[i])+6);
|
|
|
|
strcat(outname,"XXXXXX");
|
Doug Swarin pointed out a security bug in the -i option of sed.
While the permissions on the temp file are correct to prevent it from being
maliciously mangled by passing strangers, (created with 600, opened O_EXCL,
etc), the permissions on the _directory_ might not be, and we re-open the
file to convert the filehandle to a FILE * (and automatically get an error
message and exit if the directory's read-only or out of space or some such).
This opens a potential race condition if somebody's using dnotify on the
directory, deletes/renames the tempfile, and drops a symlink or something
there. Somebody running sed -i as root in a world writeable directory could
do damage.
I dug up notes on an earlier discussion where we looked at the security
implications of this (unfortunately on the #uclibc channel rather than email;
I don't have a transcript, just notes-to-self) which pointed out that if the
permissions on the directory allow other people's files to be deleted/renamed
then the original file is vulnerable to sabotage anyway. However, there are
two cases that discussion apparently didn't take into account:
1) Using another user's permissions to damage files in other directories you
can't access (standard symlink attack).
2) Reading data another user couldn't otherwise access by having the new file
belong to that other user.
This patch uses fdopen to convert the filehandle into a FILE *, rather than
reopening the file.
2005-05-18 11:26:16 +05:30
|
|
|
if(-1==(nonstdoutfd=mkstemp(outname)))
|
|
|
|
bb_error_msg_and_die("no temp file");
|
|
|
|
nonstdout=fdopen(nonstdoutfd,"w");
|
2006-03-02 01:44:16 +05:30
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
/* Set permissions of output file */
|
2006-03-02 01:44:16 +05:30
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
fstat(fileno(file),&statbuf);
|
Doug Swarin pointed out a security bug in the -i option of sed.
While the permissions on the temp file are correct to prevent it from being
maliciously mangled by passing strangers, (created with 600, opened O_EXCL,
etc), the permissions on the _directory_ might not be, and we re-open the
file to convert the filehandle to a FILE * (and automatically get an error
message and exit if the directory's read-only or out of space or some such).
This opens a potential race condition if somebody's using dnotify on the
directory, deletes/renames the tempfile, and drops a symlink or something
there. Somebody running sed -i as root in a world writeable directory could
do damage.
I dug up notes on an earlier discussion where we looked at the security
implications of this (unfortunately on the #uclibc channel rather than email;
I don't have a transcript, just notes-to-self) which pointed out that if the
permissions on the directory allow other people's files to be deleted/renamed
then the original file is vulnerable to sabotage anyway. However, there are
two cases that discussion apparently didn't take into account:
1) Using another user's permissions to damage files in other directories you
can't access (standard symlink attack).
2) Reading data another user couldn't otherwise access by having the new file
belong to that other user.
This patch uses fdopen to convert the filehandle into a FILE *, rather than
reopening the file.
2005-05-18 11:26:16 +05:30
|
|
|
fchmod(nonstdoutfd,statbuf.st_mode);
|
2004-11-25 12:51:47 +05:30
|
|
|
add_input_file(file);
|
|
|
|
process_files();
|
2004-02-18 15:24:15 +05:30
|
|
|
fclose(nonstdout);
|
2006-03-02 01:44:16 +05:30
|
|
|
|
2004-02-18 15:24:15 +05:30
|
|
|
nonstdout=stdout;
|
|
|
|
unlink(argv[i]);
|
|
|
|
rename(outname,argv[i]);
|
|
|
|
free(outname);
|
|
|
|
outname=0;
|
2004-11-25 12:51:47 +05:30
|
|
|
} else add_input_file(file);
|
2003-09-14 09:36:12 +05:30
|
|
|
} else {
|
|
|
|
status = EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
2000-02-09 01:28:47 +05:30
|
|
|
}
|
2004-11-25 12:51:47 +05:30
|
|
|
if(input_file_count>current_input_file) process_files();
|
1999-11-09 07:17:36 +05:30
|
|
|
}
|
2003-04-08 17:26:11 +05:30
|
|
|
|
2001-11-12 22:14:55 +05:30
|
|
|
return status;
|
2000-07-11 04:25:51 +05:30
|
|
|
}
|