wget: fix redirection from HTTP to FTP server
while at it, sanitize redirection in general; add printout of every redirection hop; make sure we won't print any non-ASCII garbage from remote server in error meesages. function old new delta sanitize_string - 14 +14 parse_url 294 301 +7 gethdr 190 197 +7 wget_main 2326 2331 +5 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 3/0 up/down: 33/0) Total: 33 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
ab0c8d7b35
commit
f1fab09242
@ -6,7 +6,6 @@
|
|||||||
*
|
*
|
||||||
* Licensed under GPLv2, see file LICENSE in this tarball for details.
|
* Licensed under GPLv2, see file LICENSE in this tarball for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "libbb.h"
|
#include "libbb.h"
|
||||||
|
|
||||||
struct host_info {
|
struct host_info {
|
||||||
@ -239,6 +238,15 @@ static char *base64enc_512(char buf[512], const char *str)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static char* sanitize_string(char *s)
|
||||||
|
{
|
||||||
|
unsigned char *p = (void *) s;
|
||||||
|
while (*p >= ' ')
|
||||||
|
p++;
|
||||||
|
*p = '\0';
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
static FILE *open_socket(len_and_sockaddr *lsa)
|
static FILE *open_socket(len_and_sockaddr *lsa)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
@ -294,7 +302,7 @@ static void parse_url(char *src_url, struct host_info *h)
|
|||||||
h->host = url + 6;
|
h->host = url + 6;
|
||||||
h->is_ftp = 1;
|
h->is_ftp = 1;
|
||||||
} else
|
} else
|
||||||
bb_error_msg_and_die("not an http or ftp url: %s", url);
|
bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
|
||||||
|
|
||||||
// FYI:
|
// FYI:
|
||||||
// "Real" wget 'http://busybox.net?var=a/b' sends this request:
|
// "Real" wget 'http://busybox.net?var=a/b' sends this request:
|
||||||
@ -360,7 +368,7 @@ static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/)
|
|||||||
|
|
||||||
/* verify we are at the end of the header name */
|
/* verify we are at the end of the header name */
|
||||||
if (*s != ':')
|
if (*s != ':')
|
||||||
bb_error_msg_and_die("bad header line: %s", buf);
|
bb_error_msg_and_die("bad header line: %s", sanitize_string(buf));
|
||||||
|
|
||||||
/* locate the start of the header value */
|
/* locate the start of the header value */
|
||||||
*s++ = '\0';
|
*s++ = '\0';
|
||||||
@ -433,7 +441,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
|
|||||||
|
|
||||||
sfp = open_socket(lsa);
|
sfp = open_socket(lsa);
|
||||||
if (ftpcmd(NULL, NULL, sfp, buf) != 220)
|
if (ftpcmd(NULL, NULL, sfp, buf) != 220)
|
||||||
bb_error_msg_and_die("%s", buf+4);
|
bb_error_msg_and_die("%s", sanitize_string(buf+4));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Splitting username:password pair,
|
* Splitting username:password pair,
|
||||||
@ -450,7 +458,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
|
|||||||
break;
|
break;
|
||||||
/* fall through (failed login) */
|
/* fall through (failed login) */
|
||||||
default:
|
default:
|
||||||
bb_error_msg_and_die("ftp login: %s", buf+4);
|
bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4));
|
||||||
}
|
}
|
||||||
|
|
||||||
ftpcmd("TYPE I", NULL, sfp, buf);
|
ftpcmd("TYPE I", NULL, sfp, buf);
|
||||||
@ -471,7 +479,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
|
|||||||
*/
|
*/
|
||||||
if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
|
if (ftpcmd("PASV", NULL, sfp, buf) != 227) {
|
||||||
pasv_error:
|
pasv_error:
|
||||||
bb_error_msg_and_die("bad response to %s: %s", "PASV", buf);
|
bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf));
|
||||||
}
|
}
|
||||||
// Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
|
// Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
|
||||||
// Server's IP is N1.N2.N3.N4 (we ignore it)
|
// Server's IP is N1.N2.N3.N4 (we ignore it)
|
||||||
@ -496,7 +504,7 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
|
if (ftpcmd("RETR ", target->path, sfp, buf) > 150)
|
||||||
bb_error_msg_and_die("bad response to %s: %s", "RETR", buf);
|
bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf));
|
||||||
|
|
||||||
return sfp;
|
return sfp;
|
||||||
}
|
}
|
||||||
@ -574,6 +582,7 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
struct host_info server, target;
|
struct host_info server, target;
|
||||||
len_and_sockaddr *lsa;
|
len_and_sockaddr *lsa;
|
||||||
unsigned opt;
|
unsigned opt;
|
||||||
|
int redir_limit;
|
||||||
char *proxy = NULL;
|
char *proxy = NULL;
|
||||||
char *dir_prefix = NULL;
|
char *dir_prefix = NULL;
|
||||||
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
||||||
@ -696,104 +705,91 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
|
|||||||
* We are not sure it exists on remove side */
|
* We are not sure it exists on remove side */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We want to do exactly _one_ DNS lookup, since some
|
redir_limit = 5;
|
||||||
* sites (i.e. ftp.us.debian.org) use round-robin DNS
|
resolve_lsa:
|
||||||
* and we want to connect to only one IP... */
|
|
||||||
lsa = xhost2sockaddr(server.host, server.port);
|
lsa = xhost2sockaddr(server.host, server.port);
|
||||||
if (!(opt & WGET_OPT_QUIET)) {
|
if (!(opt & WGET_OPT_QUIET)) {
|
||||||
fprintf(stderr, "Connecting to %s (%s)\n", server.host,
|
char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
|
||||||
xmalloc_sockaddr2dotted(&lsa->u.sa));
|
fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
|
||||||
/* We leak result of xmalloc_sockaddr2dotted */
|
free(s);
|
||||||
}
|
}
|
||||||
|
establish_session:
|
||||||
/* G.got_clen = 0; - already is */
|
|
||||||
sfp = NULL;
|
|
||||||
if (use_proxy || !target.is_ftp) {
|
if (use_proxy || !target.is_ftp) {
|
||||||
/*
|
/*
|
||||||
* HTTP session
|
* HTTP session
|
||||||
*/
|
*/
|
||||||
|
char *str;
|
||||||
int status;
|
int status;
|
||||||
int try = 5;
|
|
||||||
|
|
||||||
do {
|
/* Open socket to http server */
|
||||||
char *str;
|
sfp = open_socket(lsa);
|
||||||
|
|
||||||
G.got_clen = 0;
|
/* Send HTTP request */
|
||||||
G.chunked = 0;
|
if (use_proxy) {
|
||||||
|
fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
|
||||||
|
target.is_ftp ? "f" : "ht", target.host,
|
||||||
|
target.path);
|
||||||
|
} else {
|
||||||
|
if (opt & WGET_OPT_POST_DATA)
|
||||||
|
fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
|
||||||
|
else
|
||||||
|
fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
|
||||||
|
}
|
||||||
|
|
||||||
if (!--try)
|
fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
|
||||||
bb_error_msg_and_die("too many redirections");
|
target.host, user_agent);
|
||||||
|
|
||||||
/* Open socket to http server */
|
|
||||||
if (sfp) fclose(sfp);
|
|
||||||
sfp = open_socket(lsa);
|
|
||||||
|
|
||||||
/* Send HTTP request */
|
|
||||||
if (use_proxy) {
|
|
||||||
fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
|
|
||||||
target.is_ftp ? "f" : "ht", target.host,
|
|
||||||
target.path);
|
|
||||||
} else {
|
|
||||||
if (opt & WGET_OPT_POST_DATA)
|
|
||||||
fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
|
|
||||||
else
|
|
||||||
fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
|
|
||||||
target.host, user_agent);
|
|
||||||
|
|
||||||
#if ENABLE_FEATURE_WGET_AUTHENTICATION
|
#if ENABLE_FEATURE_WGET_AUTHENTICATION
|
||||||
if (target.user) {
|
if (target.user) {
|
||||||
fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
|
fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
|
||||||
base64enc_512(buf, target.user));
|
base64enc_512(buf, target.user));
|
||||||
}
|
}
|
||||||
if (use_proxy && server.user) {
|
if (use_proxy && server.user) {
|
||||||
fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
|
fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
|
||||||
base64enc_512(buf, server.user));
|
base64enc_512(buf, server.user));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (beg_range)
|
if (beg_range)
|
||||||
fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
|
fprintf(sfp, "Range: bytes=%"OFF_FMT"d-\r\n", beg_range);
|
||||||
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
#if ENABLE_FEATURE_WGET_LONG_OPTIONS
|
||||||
if (extra_headers)
|
if (extra_headers)
|
||||||
fputs(extra_headers, sfp);
|
fputs(extra_headers, sfp);
|
||||||
|
|
||||||
if (opt & WGET_OPT_POST_DATA) {
|
if (opt & WGET_OPT_POST_DATA) {
|
||||||
char *estr = URL_escape(post_data);
|
char *estr = URL_escape(post_data);
|
||||||
fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
|
fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
|
||||||
fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
|
fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
|
||||||
(int) strlen(estr), estr);
|
(int) strlen(estr), estr);
|
||||||
/*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
|
/*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
|
||||||
/*fprintf(sfp, "%s\r\n", estr);*/
|
/*fprintf(sfp, "%s\r\n", estr);*/
|
||||||
free(estr);
|
free(estr);
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{ /* If "Connection:" is needed, document why */
|
{ /* If "Connection:" is needed, document why */
|
||||||
fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
|
fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Retrieve HTTP response line and check for "200" status code.
|
* Retrieve HTTP response line and check for "200" status code.
|
||||||
*/
|
*/
|
||||||
read_response:
|
read_response:
|
||||||
if (fgets(buf, sizeof(buf), sfp) == NULL)
|
if (fgets(buf, sizeof(buf), sfp) == NULL)
|
||||||
bb_error_msg_and_die("no response from server");
|
bb_error_msg_and_die("no response from server");
|
||||||
|
|
||||||
str = buf;
|
str = buf;
|
||||||
str = skip_non_whitespace(str);
|
str = skip_non_whitespace(str);
|
||||||
str = skip_whitespace(str);
|
str = skip_whitespace(str);
|
||||||
// FIXME: no error check
|
// FIXME: no error check
|
||||||
// xatou wouldn't work: "200 OK"
|
// xatou wouldn't work: "200 OK"
|
||||||
status = atoi(str);
|
status = atoi(str);
|
||||||
switch (status) {
|
switch (status) {
|
||||||
case 0:
|
case 0:
|
||||||
case 100:
|
case 100:
|
||||||
while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
|
while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL)
|
||||||
/* eat all remaining headers */;
|
/* eat all remaining headers */;
|
||||||
goto read_response;
|
goto read_response;
|
||||||
case 200:
|
case 200:
|
||||||
/*
|
/*
|
||||||
Response 204 doesn't say "null file", it says "metadata
|
Response 204 doesn't say "null file", it says "metadata
|
||||||
has changed but data didn't":
|
has changed but data didn't":
|
||||||
@ -818,60 +814,66 @@ is always terminated by the first empty line after the header fields."
|
|||||||
However, in real world it was observed that some web servers
|
However, in real world it was observed that some web servers
|
||||||
(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
|
(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
|
||||||
*/
|
*/
|
||||||
case 204:
|
case 204:
|
||||||
|
break;
|
||||||
|
case 300: /* redirection */
|
||||||
|
case 301:
|
||||||
|
case 302:
|
||||||
|
case 303:
|
||||||
|
break;
|
||||||
|
case 206:
|
||||||
|
if (beg_range)
|
||||||
break;
|
break;
|
||||||
case 300: /* redirection */
|
/* fall through */
|
||||||
case 301:
|
default:
|
||||||
case 302:
|
bb_error_msg_and_die("server returned error: %s", sanitize_string(buf));
|
||||||
case 303:
|
}
|
||||||
break;
|
|
||||||
case 206:
|
|
||||||
if (beg_range)
|
|
||||||
break;
|
|
||||||
/* fall through */
|
|
||||||
default:
|
|
||||||
/* Show first line only and kill any ESC tricks */
|
|
||||||
buf[strcspn(buf, "\n\r\x1b")] = '\0';
|
|
||||||
bb_error_msg_and_die("server returned error: %s", buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Retrieve HTTP headers.
|
* Retrieve HTTP headers.
|
||||||
*/
|
*/
|
||||||
while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
|
while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) {
|
||||||
/* gethdr converted "FOO:" string to lowercase */
|
/* gethdr converted "FOO:" string to lowercase */
|
||||||
smalluint key = index_in_strings(keywords, buf) + 1;
|
smalluint key = index_in_strings(keywords, buf) + 1;
|
||||||
if (key == KEY_content_length) {
|
if (key == KEY_content_length) {
|
||||||
content_len = BB_STRTOOFF(str, NULL, 10);
|
content_len = BB_STRTOOFF(str, NULL, 10);
|
||||||
if (errno || content_len < 0) {
|
if (errno || content_len < 0) {
|
||||||
bb_error_msg_and_die("content-length %s is garbage", str);
|
bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
|
||||||
}
|
|
||||||
G.got_clen = 1;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
if (key == KEY_transfer_encoding) {
|
G.got_clen = 1;
|
||||||
if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
|
continue;
|
||||||
bb_error_msg_and_die("transfer encoding '%s' is not supported", str);
|
}
|
||||||
G.chunked = G.got_clen = 1;
|
if (key == KEY_transfer_encoding) {
|
||||||
}
|
if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
|
||||||
if (key == KEY_location) {
|
bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
|
||||||
if (str[0] == '/')
|
G.chunked = G.got_clen = 1;
|
||||||
/* free(target.allocated); */
|
}
|
||||||
target.path = /* target.allocated = */ xstrdup(str+1);
|
if (key == KEY_location && status >= 300) {
|
||||||
else {
|
if (--redir_limit == 0)
|
||||||
parse_url(str, &target);
|
bb_error_msg_and_die("too many redirections");
|
||||||
if (use_proxy == 0) {
|
fclose(sfp);
|
||||||
server.host = target.host;
|
G.got_clen = 0;
|
||||||
server.port = target.port;
|
G.chunked = 0;
|
||||||
}
|
if (str[0] == '/')
|
||||||
|
/* free(target.allocated); */
|
||||||
|
target.path = /* target.allocated = */ xstrdup(str+1);
|
||||||
|
/* lsa stays the same: it's on the same server */
|
||||||
|
else {
|
||||||
|
parse_url(str, &target);
|
||||||
|
if (!use_proxy) {
|
||||||
|
server.host = target.host;
|
||||||
|
server.port = target.port;
|
||||||
free(lsa);
|
free(lsa);
|
||||||
lsa = xhost2sockaddr(server.host, server.port);
|
goto resolve_lsa;
|
||||||
break;
|
} /* else: lsa stays the same: we use proxy */
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
goto establish_session;
|
||||||
}
|
}
|
||||||
} while (status >= 300);
|
}
|
||||||
|
// if (status >= 300)
|
||||||
|
// bb_error_msg_and_die("bad redirection (no Location: header from server)");
|
||||||
|
|
||||||
|
/* For HTTP, data is pumped over the same connection */
|
||||||
dfp = sfp;
|
dfp = sfp;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -897,10 +899,11 @@ However, in real world it was observed that some web servers
|
|||||||
|
|
||||||
retrieve_file_data(dfp, output_fd);
|
retrieve_file_data(dfp, output_fd);
|
||||||
|
|
||||||
if ((use_proxy == 0) && target.is_ftp) {
|
if (dfp != sfp) {
|
||||||
|
/* It's ftp. Close it properly */
|
||||||
fclose(dfp);
|
fclose(dfp);
|
||||||
if (ftpcmd(NULL, NULL, sfp, buf) != 226)
|
if (ftpcmd(NULL, NULL, sfp, buf) != 226)
|
||||||
bb_error_msg_and_die("ftp error: %s", buf+4);
|
bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4));
|
||||||
ftpcmd("QUIT", NULL, sfp, buf);
|
ftpcmd("QUIT", NULL, sfp, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user