wget: fix redirection from HTTP to FTP server

while at it, sanitize redirection in general; add printout
of every redirection hop; make sure we won't print any non-ASCII
garbage from remote server in error meesages.

function                                             old     new   delta
sanitize_string                                        -      14     +14
parse_url                                            294     301      +7
gethdr                                               190     197      +7
wget_main                                           2326    2331      +5
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 3/0 up/down: 33/0)               Total: 33 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2009-06-28 03:33:57 +02:00
parent ab0c8d7b35
commit f1fab09242

View File

@ -6,7 +6,6 @@
* *
* Licensed under GPLv2, see file LICENSE in this tarball for details. * Licensed under GPLv2, see file LICENSE in this tarball for details.
*/ */
#include "libbb.h" #include "libbb.h"
struct host_info { struct host_info {
@ -239,6 +238,15 @@ static char *base64enc_512(char buf[512], const char *str)
} }
#endif #endif
static char* sanitize_string(char *s)
{
unsigned char *p = (void *) s;
while (*p >= ' ')
p++;
*p = '\0';
return s;
}
static FILE *open_socket(len_and_sockaddr *lsa) static FILE *open_socket(len_and_sockaddr *lsa)
{ {
FILE *fp; FILE *fp;
@ -294,7 +302,7 @@ static void parse_url(char *src_url, struct host_info *h)
h->host = url + 6; h->host = url + 6;
h->is_ftp = 1; h->is_ftp = 1;
} else } else
bb_error_msg_and_die("not an http or ftp url: %s", url); bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
// FYI: // FYI:
// "Real" wget 'http://busybox.net?var=a/b' sends this request: // "Real" wget 'http://busybox.net?var=a/b' sends this request:
@ -360,7 +368,7 @@ static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
/* verify we are at the end of the header name */ /* verify we are at the end of the header name */
if (*s != ':') if (*s != ':')
bb_error_msg_and_die("bad header line: %s", buf); bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
/* locate the start of the header value */ /* locate the start of the header value */
*s++ = '\0'; *s++ = '\0';
@ -433,7 +441,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
sfp = open_socket(lsa); sfp = open_socket(lsa);
if (ftpcmd(NULL, NULL, sfp, buf) != 220) if (ftpcmd(NULL, NULL, sfp, buf) != 220)
bb_error_msg_and_die("%s", buf+4); bb_error_msg_and_die("%s", sanitize_string(buf+4));
/* /*
* Splitting username:password pair, * Splitting username:password pair,
@ -450,7 +458,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
break; break;
/* fall through (failed login) */ /* fall through (failed login) */
default: default:
bb_error_msg_and_die("ftp login: %s", buf+4); bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
} }
ftpcmd("TYPE I", NULL, sfp, buf); ftpcmd("TYPE I", NULL, sfp, buf);
@ -471,7 +479,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
*/ */
if (ftpcmd("PASV", NULL, sfp, buf) != 227) { if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
pasv_error: pasv_error:
bb_error_msg_and_die("bad response to %s: %s", "PASV", buf); bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
} }
// Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
// Server's IP is N1.N2.N3.N4 (we ignore it) // Server's IP is N1.N2.N3.N4 (we ignore it)
@ -496,7 +504,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
} }
if (ftpcmd("RETR ", target->path, sfp, buf) > 150) if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
bb_error_msg_and_die("bad response to %s: %s", "RETR", buf); bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
return sfp; return sfp;
} }
@ -574,6 +582,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
struct host_info server, target; struct host_info server, target;
len_and_sockaddr *lsa; len_and_sockaddr *lsa;
unsigned opt; unsigned opt;
int redir_limit;
char *proxy = NULL; char *proxy = NULL;
char *dir_prefix = NULL; char *dir_prefix = NULL;
#if ENABLE_FEATURE_WGET_LONG_OPTIONS #if ENABLE_FEATURE_WGET_LONG_OPTIONS
@ -696,104 +705,91 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
* We are not sure it exists on remove side */ * We are not sure it exists on remove side */
} }
/* We want to do exactly _one_ DNS lookup, since some redir_limit = 5;
* sites (i.e. ftp.us.debian.org) use round-robin DNS resolve_lsa:
* and we want to connect to only one IP... */
lsa = xhost2sockaddr(server.host, server.port); lsa = xhost2sockaddr(server.host, server.port);
if (!(opt & WGET_OPT_QUIET)) { if (!(opt & WGET_OPT_QUIET)) {
fprintf(stderr, "Connecting to %s (%s)\n", server.host, char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
xmalloc_sockaddr2dotted(&lsa->u.sa)); fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
/* We leak result of xmalloc_sockaddr2dotted */ free(s);
} }
establish_session:
/* G.got_clen = 0; - already is */
sfp = NULL;
if (use_proxy || !target.is_ftp) { if (use_proxy || !target.is_ftp) {
/* /*
* HTTP session * HTTP session
*/ */
char *str;
int status; int status;
int try = 5;
do { /* Open socket to http server */
char *str; sfp = open_socket(lsa);
G.got_clen = 0; /* Send HTTP request */
G.chunked = 0; if (use_proxy) {
fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
target.is_ftp ? "f" : "ht", target.host,
target.path);
} else {
if (opt & WGET_OPT_POST_DATA)
fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
else
fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
}
if (!--try) fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
bb_error_msg_and_die("too many redirections"); target.host, user_agent);
/* Open socket to http server */
if (sfp) fclose(sfp);
sfp = open_socket(lsa);
/* Send HTTP request */
if (use_proxy) {
fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
target.is_ftp ? "f" : "ht", target.host,
target.path);
} else {
if (opt & WGET_OPT_POST_DATA)
fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
else
fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
}
fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
target.host, user_agent);
#if ENABLE_FEATURE_WGET_AUTHENTICATION #if ENABLE_FEATURE_WGET_AUTHENTICATION
if (target.user) { if (target.user) {
fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
base64enc_512(buf, target.user)); base64enc_512(buf, target.user));
} }
if (use_proxy && server.user) { if (use_proxy && server.user) {
fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
base64enc_512(buf, server.user)); base64enc_512(buf, server.user));
} }
#endif #endif
if (beg_range) if (beg_range)
fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range); fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
#if ENABLE_FEATURE_WGET_LONG_OPTIONS #if ENABLE_FEATURE_WGET_LONG_OPTIONS
if (extra_headers) if (extra_headers)
fputs(extra_headers, sfp); fputs(extra_headers, sfp);
if (opt & WGET_OPT_POST_DATA) { if (opt & WGET_OPT_POST_DATA) {
char *estr = URL_escape(post_data); char *estr = URL_escape(post_data);
fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n"); fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s", fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
(int) strlen(estr), estr); (int) strlen(estr), estr);
/*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/ /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
/*fprintf(sfp, "%s\r\n", estr);*/ /*fprintf(sfp, "%s\r\n", estr);*/
free(estr); free(estr);
} else } else
#endif #endif
{ /* If "Connection:" is needed, document why */ { /* If "Connection:" is needed, document why */
fprintf(sfp, /* "Connection: close\r\n" */ "\r\n"); fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
} }
/* /*
* Retrieve HTTP response line and check for "200" status code. * Retrieve HTTP response line and check for "200" status code.
*/ */
read_response: read_response:
if (fgets(buf, sizeof(buf), sfp) == NULL) if (fgets(buf, sizeof(buf), sfp) == NULL)
bb_error_msg_and_die("no response from server"); bb_error_msg_and_die("no response from server");
str = buf; str = buf;
str = skip_non_whitespace(str); str = skip_non_whitespace(str);
str = skip_whitespace(str); str = skip_whitespace(str);
// FIXME: no error check // FIXME: no error check
// xatou wouldn't work: "200 OK" // xatou wouldn't work: "200 OK"
status = atoi(str); status = atoi(str);
switch (status) { switch (status) {
case 0: case 0:
case 100: case 100:
while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL) while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
/* eat all remaining headers */; /* eat all remaining headers */;
goto read_response; goto read_response;
case 200: case 200:
/* /*
Response 204 doesn't say "null file", it says "metadata Response 204 doesn't say "null file", it says "metadata
has changed but data didn't": has changed but data didn't":
@ -818,60 +814,66 @@ is always terminated by the first empty line after the header fields."
However, in real world it was observed that some web servers However, in real world it was observed that some web servers
(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
*/ */
case 204: case 204:
break;
case 300: /* redirection */
case 301:
case 302:
case 303:
break;
case 206:
if (beg_range)
break; break;
case 300: /* redirection */ /* fall through */
case 301: default:
case 302: bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
case 303: }
break;
case 206:
if (beg_range)
break;
/* fall through */
default:
/* Show first line only and kill any ESC tricks */
buf[strcspn(buf, "\n\r\x1b")] = '\0';
bb_error_msg_and_die("server returned error: %s", buf);
}
/* /*
* Retrieve HTTP headers. * Retrieve HTTP headers.
*/ */
while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) { while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
/* gethdr converted "FOO:" string to lowercase */ /* gethdr converted "FOO:" string to lowercase */
smalluint key = index_in_strings(keywords, buf) + 1; smalluint key = index_in_strings(keywords, buf) + 1;
if (key == KEY_content_length) { if (key == KEY_content_length) {
content_len = BB_STRTOOFF(str, NULL, 10); content_len = BB_STRTOOFF(str, NULL, 10);
if (errno || content_len < 0) { if (errno || content_len < 0) {
bb_error_msg_and_die("content-length %s is garbage", str); bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
}
G.got_clen = 1;
continue;
} }
if (key == KEY_transfer_encoding) { G.got_clen = 1;
if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked) continue;
bb_error_msg_and_die("transfer encoding '%s' is not supported", str); }
G.chunked = G.got_clen = 1; if (key == KEY_transfer_encoding) {
} if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
if (key == KEY_location) { bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
if (str[0] == '/') G.chunked = G.got_clen = 1;
/* free(target.allocated); */ }
target.path = /* target.allocated = */ xstrdup(str+1); if (key == KEY_location && status >= 300) {
else { if (--redir_limit == 0)
parse_url(str, &target); bb_error_msg_and_die("too many redirections");
if (use_proxy == 0) { fclose(sfp);
server.host = target.host; G.got_clen = 0;
server.port = target.port; G.chunked = 0;
} if (str[0] == '/')
/* free(target.allocated); */
target.path = /* target.allocated = */ xstrdup(str+1);
/* lsa stays the same: it's on the same server */
else {
parse_url(str, &target);
if (!use_proxy) {
server.host = target.host;
server.port = target.port;
free(lsa); free(lsa);
lsa = xhost2sockaddr(server.host, server.port); goto resolve_lsa;
break; } /* else: lsa stays the same: we use proxy */
}
} }
goto establish_session;
} }
} while (status >= 300); }
// if (status >= 300)
// bb_error_msg_and_die("bad redirection (no Location: header from server)");
/* For HTTP, data is pumped over the same connection */
dfp = sfp; dfp = sfp;
} else { } else {
@ -897,10 +899,11 @@ However, in real world it was observed that some web servers
retrieve_file_data(dfp, output_fd); retrieve_file_data(dfp, output_fd);
if ((use_proxy == 0) && target.is_ftp) { if (dfp != sfp) {
/* It's ftp. Close it properly */
fclose(dfp); fclose(dfp);
if (ftpcmd(NULL, NULL, sfp, buf) != 226) if (ftpcmd(NULL, NULL, sfp, buf) != 226)
bb_error_msg_and_die("ftp error: %s", buf+4); bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
ftpcmd("QUIT", NULL, sfp, buf); ftpcmd("QUIT", NULL, sfp, buf);
} }