sed: code shrink

text    data     bss     dec     hex filename
 876354     493    7584  884431   d7ecf busybox_old
 876323     493    7584  884400   d7eb0 busybox_unstripped

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2011-05-03 00:51:43 +02:00
parent 91b77c3953
commit 52d8370836

View File

@ -14,49 +14,47 @@
*/ */
/* Code overview. /* Code overview.
*
* Files are laid out to avoid unnecessary function declarations. So for
* example, every function add_cmd calls occurs before add_cmd in this file.
*
* add_cmd() is called on each line of sed command text (from a file or from
* the command line). It calls get_address() and parse_cmd_args(). The
* resulting sed_cmd_t structures are appended to a linked list
* (G.sed_cmd_head/G.sed_cmd_tail).
*
* add_input_file() adds a FILE* to the list of input files. We need to
* know all input sources ahead of time to find the last line for the $ match.
*
* process_files() does actual sedding, reading data lines from each input FILE *
* (which could be stdin) and applying the sed command list (sed_cmd_head) to
* each of the resulting lines.
*
* sed_main() is where external code calls into this, with a command line.
*/
Files are laid out to avoid unnecessary function declarations. So for /* Supported features and commands in this version of sed:
example, every function add_cmd calls occurs before add_cmd in this file. *
* - comments ('#')
add_cmd() is called on each line of sed command text (from a file or from * - address matching: num|/matchstr/[,num|/matchstr/|$]command
the command line). It calls get_address() and parse_cmd_args(). The * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
resulting sed_cmd_t structures are appended to a linked list * - edit commands: (a)ppend, (i)nsert, (c)hange
(G.sed_cmd_head/G.sed_cmd_tail). * - file commands: (r)ead
* - backreferences in substitution expressions (\0, \1, \2...\9)
add_input_file() adds a FILE* to the list of input files. We need to * - grouped commands: {cmd1;cmd2}
know all input sources ahead of time to find the last line for the $ match. * - transliteration (y/source-chars/dest-chars/)
* - pattern space hold space storing / swapping (g, h, x)
process_files() does actual sedding, reading data lines from each input FILE * * - labels / branching (: label, b, t, T)
(which could be stdin) and applying the sed command list (sed_cmd_head) to *
each of the resulting lines. * (Note: Specifying an address (range) to match is *optional*; commands
* default to the whole pattern space if no specific address match was
sed_main() is where external code calls into this, with a command line. * requested.)
*/ *
* Todo:
* - Create a wrapper around regex to make libc's regex conform with sed
/* *
Supported features and commands in this version of sed: * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
*/
- comments ('#')
- address matching: num|/matchstr/[,num|/matchstr/|$]command
- commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
- edit commands: (a)ppend, (i)nsert, (c)hange
- file commands: (r)ead
- backreferences in substitution expressions (\0, \1, \2...\9)
- grouped commands: {cmd1;cmd2}
- transliteration (y/source-chars/dest-chars/)
- pattern space hold space storing / swapping (g, h, x)
- labels / branching (: label, b, t, T)
(Note: Specifying an address (range) to match is *optional*; commands
default to the whole pattern space if no specific address match was
requested.)
Todo:
- Create a wrapper around regex to make libc's regex conform with sed
Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
*/
//usage:#define sed_trivial_usage //usage:#define sed_trivial_usage
//usage: "[-efinr] SED_CMD [FILE]..." //usage: "[-efinr] SED_CMD [FILE]..."
@ -244,11 +242,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
delimiter = -delimiter; delimiter = -delimiter;
} }
for (; (ch = str[idx]); idx++) { for (; (ch = str[idx]) != '\0'; idx++) {
if (bracket >= 0) { if (bracket >= 0) {
if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 if (ch == ']'
&& str[idx - 1] == '^'))) && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
) {
bracket = -1; bracket = -1;
}
} else if (escaped) } else if (escaped)
escaped = 0; escaped = 0;
else if (ch == '\\') else if (ch == '\\')
@ -434,11 +434,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
*/ */
static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
{ {
static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}";
enum {
IDX_s = 0,
IDX_a,
IDX_i,
IDX_c,
IDX_r,
IDX_w,
IDX_colon,
IDX_b,
IDX_t,
IDX_T,
IDX_y,
IDX_d,
IDX_D,
IDX_g,
IDX_G,
IDX_h,
IDX_H,
IDX_l,
IDX_n,
IDX_N,
IDX_p,
IDX_P,
IDX_q,
IDX_x,
IDX_equal,
IDX_lbrace,
IDX_rbrace,
IDX_nul
};
struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; };
unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
/* handle (s)ubstitution command */ /* handle (s)ubstitution command */
if (sed_cmd->cmd == 's') if (idx == IDX_s) {
cmdstr += parse_subst_cmd(sed_cmd, cmdstr); cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
}
/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
else if (strchr("aic", sed_cmd->cmd)) { else if (idx <= IDX_c) { /* a,i,c */
if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
bb_error_msg_and_die("only a beginning address can be specified for edit commands"); bb_error_msg_and_die("only a beginning address can be specified for edit commands");
for (;;) { for (;;) {
@ -454,8 +490,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
/* "\anychar" -> "anychar" */ /* "\anychar" -> "anychar" */
parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
cmdstr += strlen(cmdstr); cmdstr += strlen(cmdstr);
}
/* handle file cmds: (r)ead */ /* handle file cmds: (r)ead */
} else if (strchr("rw", sed_cmd->cmd)) { else if (idx <= IDX_w) { /* r,w */
if (sed_cmd->end_line || sed_cmd->end_match) if (sed_cmd->end_line || sed_cmd->end_match)
bb_error_msg_and_die("command only uses one address"); bb_error_msg_and_die("command only uses one address");
cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
@ -463,8 +500,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
sed_cmd->sw_last_char = '\n'; sed_cmd->sw_last_char = '\n';
} }
}
/* handle branch commands */ /* handle branch commands */
} else if (strchr(":btT", sed_cmd->cmd)) { else if (idx <= IDX_T) { /* :,b,t,T */
int length; int length;
cmdstr = skip_whitespace(cmdstr); cmdstr = skip_whitespace(cmdstr);
@ -475,7 +513,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
} }
} }
/* translation command */ /* translation command */
else if (sed_cmd->cmd == 'y') { else if (idx == IDX_y) {
char *match, *replace; char *match, *replace;
int i = cmdstr[0]; int i = cmdstr[0];
@ -495,7 +533,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
/* if it wasnt a single-letter command that takes no arguments /* if it wasnt a single-letter command that takes no arguments
* then it must be an invalid command. * then it must be an invalid command.
*/ */
else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
} }
@ -966,9 +1004,9 @@ static void process_files(void)
} }
sed_cmd->in_match = !( sed_cmd->in_match = !(
/* has the ending line come, or is this a single address command? */ /* has the ending line come, or is this a single address command? */
(sed_cmd->end_line ? (sed_cmd->end_line
sed_cmd->end_line == -1 ? ? sed_cmd->end_line == -1
!next_line ? !next_line
: (sed_cmd->end_line <= linenum) : (sed_cmd->end_line <= linenum)
: !sed_cmd->end_match : !sed_cmd->end_match
) )