sed: code shrink
text data bss dec hex filename 876354 493 7584 884431 d7ecf busybox_old 876323 493 7584 884400 d7eb0 busybox_unstripped Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
91b77c3953
commit
52d8370836
142
editors/sed.c
142
editors/sed.c
@ -14,48 +14,46 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* Code overview.
|
/* Code overview.
|
||||||
|
*
|
||||||
Files are laid out to avoid unnecessary function declarations. So for
|
* Files are laid out to avoid unnecessary function declarations. So for
|
||||||
example, every function add_cmd calls occurs before add_cmd in this file.
|
* example, every function add_cmd calls occurs before add_cmd in this file.
|
||||||
|
*
|
||||||
add_cmd() is called on each line of sed command text (from a file or from
|
* add_cmd() is called on each line of sed command text (from a file or from
|
||||||
the command line). It calls get_address() and parse_cmd_args(). The
|
* the command line). It calls get_address() and parse_cmd_args(). The
|
||||||
resulting sed_cmd_t structures are appended to a linked list
|
* resulting sed_cmd_t structures are appended to a linked list
|
||||||
(G.sed_cmd_head/G.sed_cmd_tail).
|
* (G.sed_cmd_head/G.sed_cmd_tail).
|
||||||
|
*
|
||||||
add_input_file() adds a FILE* to the list of input files. We need to
|
* add_input_file() adds a FILE* to the list of input files. We need to
|
||||||
know all input sources ahead of time to find the last line for the $ match.
|
* know all input sources ahead of time to find the last line for the $ match.
|
||||||
|
*
|
||||||
process_files() does actual sedding, reading data lines from each input FILE *
|
* process_files() does actual sedding, reading data lines from each input FILE *
|
||||||
(which could be stdin) and applying the sed command list (sed_cmd_head) to
|
* (which could be stdin) and applying the sed command list (sed_cmd_head) to
|
||||||
each of the resulting lines.
|
* each of the resulting lines.
|
||||||
|
*
|
||||||
sed_main() is where external code calls into this, with a command line.
|
* sed_main() is where external code calls into this, with a command line.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Supported features and commands in this version of sed:
|
||||||
/*
|
*
|
||||||
Supported features and commands in this version of sed:
|
* - comments ('#')
|
||||||
|
* - address matching: num|/matchstr/[,num|/matchstr/|$]command
|
||||||
- comments ('#')
|
* - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
|
||||||
- address matching: num|/matchstr/[,num|/matchstr/|$]command
|
* - edit commands: (a)ppend, (i)nsert, (c)hange
|
||||||
- commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
|
* - file commands: (r)ead
|
||||||
- edit commands: (a)ppend, (i)nsert, (c)hange
|
* - backreferences in substitution expressions (\0, \1, \2...\9)
|
||||||
- file commands: (r)ead
|
* - grouped commands: {cmd1;cmd2}
|
||||||
- backreferences in substitution expressions (\0, \1, \2...\9)
|
* - transliteration (y/source-chars/dest-chars/)
|
||||||
- grouped commands: {cmd1;cmd2}
|
* - pattern space hold space storing / swapping (g, h, x)
|
||||||
- transliteration (y/source-chars/dest-chars/)
|
* - labels / branching (: label, b, t, T)
|
||||||
- pattern space hold space storing / swapping (g, h, x)
|
*
|
||||||
- labels / branching (: label, b, t, T)
|
* (Note: Specifying an address (range) to match is *optional*; commands
|
||||||
|
* default to the whole pattern space if no specific address match was
|
||||||
(Note: Specifying an address (range) to match is *optional*; commands
|
* requested.)
|
||||||
default to the whole pattern space if no specific address match was
|
*
|
||||||
requested.)
|
* Todo:
|
||||||
|
* - Create a wrapper around regex to make libc's regex conform with sed
|
||||||
Todo:
|
*
|
||||||
- Create a wrapper around regex to make libc's regex conform with sed
|
* Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
|
||||||
|
|
||||||
Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//usage:#define sed_trivial_usage
|
//usage:#define sed_trivial_usage
|
||||||
@ -244,11 +242,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str)
|
|||||||
delimiter = -delimiter;
|
delimiter = -delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; (ch = str[idx]); idx++) {
|
for (; (ch = str[idx]) != '\0'; idx++) {
|
||||||
if (bracket >= 0) {
|
if (bracket >= 0) {
|
||||||
if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
|
if (ch == ']'
|
||||||
&& str[idx - 1] == '^')))
|
&& !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^'))
|
||||||
|
) {
|
||||||
bracket = -1;
|
bracket = -1;
|
||||||
|
}
|
||||||
} else if (escaped)
|
} else if (escaped)
|
||||||
escaped = 0;
|
escaped = 0;
|
||||||
else if (ch == '\\')
|
else if (ch == '\\')
|
||||||
@ -434,11 +434,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr)
|
|||||||
*/
|
*/
|
||||||
static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
||||||
{
|
{
|
||||||
|
static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}";
|
||||||
|
enum {
|
||||||
|
IDX_s = 0,
|
||||||
|
IDX_a,
|
||||||
|
IDX_i,
|
||||||
|
IDX_c,
|
||||||
|
IDX_r,
|
||||||
|
IDX_w,
|
||||||
|
IDX_colon,
|
||||||
|
IDX_b,
|
||||||
|
IDX_t,
|
||||||
|
IDX_T,
|
||||||
|
IDX_y,
|
||||||
|
IDX_d,
|
||||||
|
IDX_D,
|
||||||
|
IDX_g,
|
||||||
|
IDX_G,
|
||||||
|
IDX_h,
|
||||||
|
IDX_H,
|
||||||
|
IDX_l,
|
||||||
|
IDX_n,
|
||||||
|
IDX_N,
|
||||||
|
IDX_p,
|
||||||
|
IDX_P,
|
||||||
|
IDX_q,
|
||||||
|
IDX_x,
|
||||||
|
IDX_equal,
|
||||||
|
IDX_lbrace,
|
||||||
|
IDX_rbrace,
|
||||||
|
IDX_nul
|
||||||
|
};
|
||||||
|
struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; };
|
||||||
|
|
||||||
|
unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters;
|
||||||
|
|
||||||
/* handle (s)ubstitution command */
|
/* handle (s)ubstitution command */
|
||||||
if (sed_cmd->cmd == 's')
|
if (idx == IDX_s) {
|
||||||
cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
|
cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
|
||||||
|
}
|
||||||
/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
|
/* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
|
||||||
else if (strchr("aic", sed_cmd->cmd)) {
|
else if (idx <= IDX_c) { /* a,i,c */
|
||||||
if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
|
if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
|
||||||
bb_error_msg_and_die("only a beginning address can be specified for edit commands");
|
bb_error_msg_and_die("only a beginning address can be specified for edit commands");
|
||||||
for (;;) {
|
for (;;) {
|
||||||
@ -454,8 +490,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
/* "\anychar" -> "anychar" */
|
/* "\anychar" -> "anychar" */
|
||||||
parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
|
parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0');
|
||||||
cmdstr += strlen(cmdstr);
|
cmdstr += strlen(cmdstr);
|
||||||
|
}
|
||||||
/* handle file cmds: (r)ead */
|
/* handle file cmds: (r)ead */
|
||||||
} else if (strchr("rw", sed_cmd->cmd)) {
|
else if (idx <= IDX_w) { /* r,w */
|
||||||
if (sed_cmd->end_line || sed_cmd->end_match)
|
if (sed_cmd->end_line || sed_cmd->end_match)
|
||||||
bb_error_msg_and_die("command only uses one address");
|
bb_error_msg_and_die("command only uses one address");
|
||||||
cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
|
cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string);
|
||||||
@ -463,8 +500,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
|
sed_cmd->sw_file = xfopen_for_write(sed_cmd->string);
|
||||||
sed_cmd->sw_last_char = '\n';
|
sed_cmd->sw_last_char = '\n';
|
||||||
}
|
}
|
||||||
|
}
|
||||||
/* handle branch commands */
|
/* handle branch commands */
|
||||||
} else if (strchr(":btT", sed_cmd->cmd)) {
|
else if (idx <= IDX_T) { /* :,b,t,T */
|
||||||
int length;
|
int length;
|
||||||
|
|
||||||
cmdstr = skip_whitespace(cmdstr);
|
cmdstr = skip_whitespace(cmdstr);
|
||||||
@ -475,7 +513,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* translation command */
|
/* translation command */
|
||||||
else if (sed_cmd->cmd == 'y') {
|
else if (idx == IDX_y) {
|
||||||
char *match, *replace;
|
char *match, *replace;
|
||||||
int i = cmdstr[0];
|
int i = cmdstr[0];
|
||||||
|
|
||||||
@ -495,7 +533,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
|
|||||||
/* if it wasnt a single-letter command that takes no arguments
|
/* if it wasnt a single-letter command that takes no arguments
|
||||||
* then it must be an invalid command.
|
* then it must be an invalid command.
|
||||||
*/
|
*/
|
||||||
else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
|
else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */
|
||||||
bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
|
bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -966,9 +1004,9 @@ static void process_files(void)
|
|||||||
}
|
}
|
||||||
sed_cmd->in_match = !(
|
sed_cmd->in_match = !(
|
||||||
/* has the ending line come, or is this a single address command? */
|
/* has the ending line come, or is this a single address command? */
|
||||||
(sed_cmd->end_line ?
|
(sed_cmd->end_line
|
||||||
sed_cmd->end_line == -1 ?
|
? sed_cmd->end_line == -1
|
||||||
!next_line
|
? !next_line
|
||||||
: (sed_cmd->end_line <= linenum)
|
: (sed_cmd->end_line <= linenum)
|
||||||
: !sed_cmd->end_match
|
: !sed_cmd->end_match
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user