Annotation of /trunk/mkinitrd-magellan/busybox/networking/wget.c
Parent Directory | Revision Log
Revision 1123 -
(hide annotations)
(download)
Wed Aug 18 21:56:57 2010 UTC (13 years, 9 months ago) by niro
File MIME type: text/plain
File size: 22619 byte(s)
Wed Aug 18 21:56:57 2010 UTC (13 years, 9 months ago) by niro
File MIME type: text/plain
File size: 22619 byte(s)
-updated to busybox-1.17.1
1 | niro | 532 | /* vi: set sw=4 ts=4: */ |
2 | /* | ||
3 | * wget - retrieve a file using HTTP or FTP | ||
4 | * | ||
5 | * Chip Rosenthal Covad Communications <chip@laserlink.net> | ||
6 | * | ||
7 | niro | 984 | * Licensed under GPLv2, see file LICENSE in this tarball for details. |
8 | niro | 532 | */ |
9 | niro | 816 | #include "libbb.h" |
10 | niro | 532 | |
11 | struct host_info { | ||
12 | // May be used if we ever will want to free() all xstrdup()s... | ||
13 | /* char *allocated; */ | ||
14 | niro | 816 | const char *path; |
15 | const char *user; | ||
16 | char *host; | ||
17 | int port; | ||
18 | smallint is_ftp; | ||
19 | niro | 532 | }; |
20 | |||
21 | |||
22 | niro | 816 | /* Globals (can be accessed from signal handlers) */ |
23 | struct globals { | ||
24 | off_t content_len; /* Content-length of the file */ | ||
25 | off_t beg_range; /* Range at which continue begins */ | ||
26 | niro | 532 | #if ENABLE_FEATURE_WGET_STATUSBAR |
27 | niro | 816 | off_t transferred; /* Number of bytes transferred so far */ |
28 | const char *curfile; /* Name of current file being transferred */ | ||
29 | niro | 984 | bb_progress_t pmt; |
30 | niro | 532 | #endif |
31 | niro | 984 | smallint chunked; /* chunked transfer encoding */ |
32 | smallint got_clen; /* got content-length: from server */ | ||
33 | niro | 1123 | } FIX_ALIASING; |
34 | niro | 816 | #define G (*(struct globals*)&bb_common_bufsiz1) |
35 | struct BUG_G_too_big { | ||
36 | char BUG_G_too_big[sizeof(G) <= COMMON_BUFSIZE ? 1 : -1]; | ||
37 | }; | ||
38 | #define INIT_G() do { } while (0) | ||
39 | |||
40 | |||
41 | niro | 532 | #if ENABLE_FEATURE_WGET_STATUSBAR |
42 | niro | 816 | |
43 | niro | 984 | static void progress_meter(int flag) |
44 | niro | 816 | { |
45 | /* We can be called from signal handler */ | ||
46 | int save_errno = errno; | ||
47 | |||
48 | niro | 984 | if (flag == -1) { /* first call to progress_meter */ |
49 | bb_progress_init(&G.pmt); | ||
50 | niro | 816 | } |
51 | |||
52 | niro | 984 | bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred, |
53 | niro | 1123 | G.chunked ? 0 : G.beg_range + G.transferred + G.content_len); |
54 | niro | 816 | |
55 | if (flag == 0) { | ||
56 | niro | 984 | /* last call to progress_meter */ |
57 | niro | 816 | alarm(0); |
58 | niro | 1123 | bb_putchar_stderr('\n'); |
59 | niro | 984 | G.transferred = 0; |
60 | niro | 816 | } else { |
61 | niro | 984 | if (flag == -1) { /* first call to progress_meter */ |
62 | signal_SA_RESTART_empty_mask(SIGALRM, progress_meter); | ||
63 | niro | 816 | } |
64 | alarm(1); | ||
65 | } | ||
66 | |||
67 | errno = save_errno; | ||
68 | } | ||
69 | niro | 984 | |
70 | niro | 816 | #else /* FEATURE_WGET_STATUSBAR */ |
71 | |||
72 | niro | 984 | static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { } |
73 | niro | 816 | |
74 | niro | 532 | #endif |
75 | |||
76 | niro | 816 | |
77 | niro | 984 | /* IPv6 knows scoped address types i.e. link and site local addresses. Link |
78 | * local addresses can have a scope identifier to specify the | ||
79 | * interface/link an address is valid on (e.g. fe80::1%eth0). This scope | ||
80 | * identifier is only valid on a single node. | ||
81 | * | ||
82 | * RFC 4007 says that the scope identifier MUST NOT be sent across the wire, | ||
83 | * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers | ||
84 | * in the Host header as invalid requests, see | ||
85 | * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122 | ||
86 | */ | ||
87 | static void strip_ipv6_scope_id(char *host) | ||
88 | { | ||
89 | char *scope, *cp; | ||
90 | |||
91 | /* bbox wget actually handles IPv6 addresses without [], like | ||
92 | * wget "http://::1/xxx", but this is not standard. | ||
93 | * To save code, _here_ we do not support it. */ | ||
94 | |||
95 | if (host[0] != '[') | ||
96 | return; /* not IPv6 */ | ||
97 | |||
98 | scope = strchr(host, '%'); | ||
99 | if (!scope) | ||
100 | return; | ||
101 | |||
102 | /* Remove the IPv6 zone identifier from the host address */ | ||
103 | cp = strchr(host, ']'); | ||
104 | if (!cp || (cp[1] != ':' && cp[1] != '\0')) { | ||
105 | /* malformed address (not "[xx]:nn" or "[xx]") */ | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | /* cp points to "]...", scope points to "%eth0]..." */ | ||
110 | overlapping_strcpy(scope, cp); | ||
111 | } | ||
112 | |||
113 | niro | 816 | /* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read, |
114 | * and a short count if an eof or non-interrupt error is encountered. */ | ||
115 | static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream) | ||
116 | niro | 532 | { |
117 | niro | 816 | size_t ret; |
118 | char *p = (char*)ptr; | ||
119 | niro | 532 | |
120 | do { | ||
121 | clearerr(stream); | ||
122 | niro | 984 | errno = 0; |
123 | niro | 816 | ret = fread(p, 1, nmemb, stream); |
124 | p += ret; | ||
125 | nmemb -= ret; | ||
126 | } while (nmemb && ferror(stream) && errno == EINTR); | ||
127 | niro | 532 | |
128 | niro | 816 | return p - (char*)ptr; |
129 | niro | 532 | } |
130 | |||
131 | niro | 816 | /* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM. |
132 | niro | 532 | * Returns S, or NULL if an eof or non-interrupt error is encountered. */ |
133 | static char *safe_fgets(char *s, int size, FILE *stream) | ||
134 | { | ||
135 | char *ret; | ||
136 | |||
137 | do { | ||
138 | clearerr(stream); | ||
139 | niro | 984 | errno = 0; |
140 | niro | 532 | ret = fgets(s, size, stream); |
141 | } while (ret == NULL && ferror(stream) && errno == EINTR); | ||
142 | |||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | #if ENABLE_FEATURE_WGET_AUTHENTICATION | ||
147 | niro | 816 | /* Base64-encode character string. buf is assumed to be char buf[512]. */ |
148 | static char *base64enc_512(char buf[512], const char *str) | ||
149 | niro | 532 | { |
150 | niro | 816 | unsigned len = strlen(str); |
151 | if (len > 512/4*3 - 10) /* paranoia */ | ||
152 | len = 512/4*3 - 10; | ||
153 | bb_uuencode(buf, str, len, bb_uuenc_tbl_base64); | ||
154 | niro | 532 | return buf; |
155 | } | ||
156 | #endif | ||
157 | |||
158 | niro | 984 | static char* sanitize_string(char *s) |
159 | { | ||
160 | unsigned char *p = (void *) s; | ||
161 | while (*p >= ' ') | ||
162 | p++; | ||
163 | *p = '\0'; | ||
164 | return s; | ||
165 | } | ||
166 | niro | 816 | |
167 | static FILE *open_socket(len_and_sockaddr *lsa) | ||
168 | niro | 532 | { |
169 | niro | 816 | FILE *fp; |
170 | |||
171 | /* glibc 2.4 seems to try seeking on it - ??! */ | ||
172 | /* hopefully it understands what ESPIPE means... */ | ||
173 | fp = fdopen(xconnect_stream(lsa), "r+"); | ||
174 | if (fp == NULL) | ||
175 | bb_perror_msg_and_die("fdopen"); | ||
176 | |||
177 | return fp; | ||
178 | } | ||
179 | |||
180 | static int ftpcmd(const char *s1, const char *s2, FILE *fp, char *buf) | ||
181 | { | ||
182 | int result; | ||
183 | if (s1) { | ||
184 | if (!s2) s2 = ""; | ||
185 | fprintf(fp, "%s%s\r\n", s1, s2); | ||
186 | fflush(fp); | ||
187 | } | ||
188 | |||
189 | do { | ||
190 | char *buf_ptr; | ||
191 | |||
192 | if (fgets(buf, 510, fp) == NULL) { | ||
193 | bb_perror_msg_and_die("error getting response"); | ||
194 | } | ||
195 | buf_ptr = strstr(buf, "\r\n"); | ||
196 | if (buf_ptr) { | ||
197 | *buf_ptr = '\0'; | ||
198 | } | ||
199 | } while (!isdigit(buf[0]) || buf[3] != ' '); | ||
200 | |||
201 | buf[3] = '\0'; | ||
202 | result = xatoi_u(buf); | ||
203 | buf[3] = ' '; | ||
204 | return result; | ||
205 | } | ||
206 | |||
207 | static void parse_url(char *src_url, struct host_info *h) | ||
208 | { | ||
209 | char *url, *p, *sp; | ||
210 | |||
211 | /* h->allocated = */ url = xstrdup(src_url); | ||
212 | |||
213 | if (strncmp(url, "http://", 7) == 0) { | ||
214 | h->port = bb_lookup_port("http", "tcp", 80); | ||
215 | h->host = url + 7; | ||
216 | h->is_ftp = 0; | ||
217 | } else if (strncmp(url, "ftp://", 6) == 0) { | ||
218 | h->port = bb_lookup_port("ftp", "tcp", 21); | ||
219 | h->host = url + 6; | ||
220 | h->is_ftp = 1; | ||
221 | } else | ||
222 | niro | 984 | bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url)); |
223 | niro | 816 | |
224 | // FYI: | ||
225 | // "Real" wget 'http://busybox.net?var=a/b' sends this request: | ||
226 | // 'GET /?var=a/b HTTP 1.0' | ||
227 | // and saves 'index.html?var=a%2Fb' (we save 'b') | ||
228 | // wget 'http://busybox.net?login=john@doe': | ||
229 | // request: 'GET /?login=john@doe HTTP/1.0' | ||
230 | // saves: 'index.html?login=john@doe' (we save '?login=john@doe') | ||
231 | // wget 'http://busybox.net#test/test': | ||
232 | // request: 'GET / HTTP/1.0' | ||
233 | // saves: 'index.html' (we save 'test') | ||
234 | // | ||
235 | // We also don't add unique .N suffix if file exists... | ||
236 | sp = strchr(h->host, '/'); | ||
237 | p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p; | ||
238 | p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p; | ||
239 | if (!sp) { | ||
240 | h->path = ""; | ||
241 | } else if (*sp == '/') { | ||
242 | *sp = '\0'; | ||
243 | h->path = sp + 1; | ||
244 | } else { // '#' or '?' | ||
245 | // http://busybox.net?login=john@doe is a valid URL | ||
246 | // memmove converts to: | ||
247 | // http:/busybox.nett?login=john@doe... | ||
248 | memmove(h->host - 1, h->host, sp - h->host); | ||
249 | h->host--; | ||
250 | sp[-1] = '\0'; | ||
251 | h->path = sp; | ||
252 | } | ||
253 | |||
254 | niro | 984 | // We used to set h->user to NULL here, but this interferes |
255 | // with handling of code 302 ("object was moved") | ||
256 | |||
257 | niro | 816 | sp = strrchr(h->host, '@'); |
258 | if (sp != NULL) { | ||
259 | h->user = h->host; | ||
260 | *sp = '\0'; | ||
261 | h->host = sp + 1; | ||
262 | } | ||
263 | |||
264 | sp = h->host; | ||
265 | } | ||
266 | |||
267 | static char *gethdr(char *buf, size_t bufsiz, FILE *fp /*, int *istrunc*/) | ||
268 | { | ||
269 | char *s, *hdrval; | ||
270 | int c; | ||
271 | |||
272 | /* *istrunc = 0; */ | ||
273 | |||
274 | /* retrieve header line */ | ||
275 | if (fgets(buf, bufsiz, fp) == NULL) | ||
276 | return NULL; | ||
277 | |||
278 | /* see if we are at the end of the headers */ | ||
279 | for (s = buf; *s == '\r'; ++s) | ||
280 | continue; | ||
281 | if (*s == '\n') | ||
282 | return NULL; | ||
283 | |||
284 | /* convert the header name to lower case */ | ||
285 | niro | 1123 | for (s = buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) { |
286 | /* tolower for "A-Z", no-op for "0-9a-z-." */ | ||
287 | *s = (*s | 0x20); | ||
288 | } | ||
289 | niro | 816 | |
290 | /* verify we are at the end of the header name */ | ||
291 | if (*s != ':') | ||
292 | niro | 984 | bb_error_msg_and_die("bad header line: %s", sanitize_string(buf)); |
293 | niro | 816 | |
294 | /* locate the start of the header value */ | ||
295 | *s++ = '\0'; | ||
296 | hdrval = skip_whitespace(s); | ||
297 | |||
298 | /* locate the end of header */ | ||
299 | while (*s && *s != '\r' && *s != '\n') | ||
300 | ++s; | ||
301 | |||
302 | /* end of header found */ | ||
303 | if (*s) { | ||
304 | *s = '\0'; | ||
305 | return hdrval; | ||
306 | } | ||
307 | |||
308 | niro | 984 | /* Rats! The buffer isn't big enough to hold the entire header value */ |
309 | niro | 816 | while (c = getc(fp), c != EOF && c != '\n') |
310 | continue; | ||
311 | /* *istrunc = 1; */ | ||
312 | return hdrval; | ||
313 | } | ||
314 | |||
315 | niro | 984 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS |
316 | static char *URL_escape(const char *str) | ||
317 | { | ||
318 | /* URL encode, see RFC 2396 */ | ||
319 | char *dst; | ||
320 | char *res = dst = xmalloc(strlen(str) * 3 + 1); | ||
321 | unsigned char c; | ||
322 | niro | 816 | |
323 | niro | 984 | while (1) { |
324 | c = *str++; | ||
325 | if (c == '\0' | ||
326 | /* || strchr("!&'()*-.=_~", c) - more code */ | ||
327 | || c == '!' | ||
328 | || c == '&' | ||
329 | || c == '\'' | ||
330 | || c == '(' | ||
331 | || c == ')' | ||
332 | || c == '*' | ||
333 | || c == '-' | ||
334 | || c == '.' | ||
335 | || c == '=' | ||
336 | || c == '_' | ||
337 | || c == '~' | ||
338 | || (c >= '0' && c <= '9') | ||
339 | || ((c|0x20) >= 'a' && (c|0x20) <= 'z') | ||
340 | ) { | ||
341 | *dst++ = c; | ||
342 | if (c == '\0') | ||
343 | return res; | ||
344 | } else { | ||
345 | *dst++ = '%'; | ||
346 | *dst++ = bb_hexdigits_upcase[c >> 4]; | ||
347 | *dst++ = bb_hexdigits_upcase[c & 0xf]; | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | #endif | ||
352 | |||
353 | static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa) | ||
354 | { | ||
355 | char buf[512]; | ||
356 | FILE *sfp; | ||
357 | char *str; | ||
358 | int port; | ||
359 | |||
360 | if (!target->user) | ||
361 | target->user = xstrdup("anonymous:busybox@"); | ||
362 | |||
363 | sfp = open_socket(lsa); | ||
364 | if (ftpcmd(NULL, NULL, sfp, buf) != 220) | ||
365 | bb_error_msg_and_die("%s", sanitize_string(buf+4)); | ||
366 | |||
367 | /* | ||
368 | * Splitting username:password pair, | ||
369 | * trying to log in | ||
370 | */ | ||
371 | str = strchr(target->user, ':'); | ||
372 | if (str) | ||
373 | *str++ = '\0'; | ||
374 | switch (ftpcmd("USER ", target->user, sfp, buf)) { | ||
375 | case 230: | ||
376 | break; | ||
377 | case 331: | ||
378 | if (ftpcmd("PASS ", str, sfp, buf) == 230) | ||
379 | break; | ||
380 | /* fall through (failed login) */ | ||
381 | default: | ||
382 | bb_error_msg_and_die("ftp login: %s", sanitize_string(buf+4)); | ||
383 | } | ||
384 | |||
385 | ftpcmd("TYPE I", NULL, sfp, buf); | ||
386 | |||
387 | /* | ||
388 | * Querying file size | ||
389 | */ | ||
390 | if (ftpcmd("SIZE ", target->path, sfp, buf) == 213) { | ||
391 | G.content_len = BB_STRTOOFF(buf+4, NULL, 10); | ||
392 | if (G.content_len < 0 || errno) { | ||
393 | bb_error_msg_and_die("SIZE value is garbage"); | ||
394 | } | ||
395 | G.got_clen = 1; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Entering passive mode | ||
400 | */ | ||
401 | if (ftpcmd("PASV", NULL, sfp, buf) != 227) { | ||
402 | pasv_error: | ||
403 | bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(buf)); | ||
404 | } | ||
405 | // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage] | ||
406 | // Server's IP is N1.N2.N3.N4 (we ignore it) | ||
407 | // Server's port for data connection is P1*256+P2 | ||
408 | str = strrchr(buf, ')'); | ||
409 | if (str) str[0] = '\0'; | ||
410 | str = strrchr(buf, ','); | ||
411 | if (!str) goto pasv_error; | ||
412 | port = xatou_range(str+1, 0, 255); | ||
413 | *str = '\0'; | ||
414 | str = strrchr(buf, ','); | ||
415 | if (!str) goto pasv_error; | ||
416 | port += xatou_range(str+1, 0, 255) * 256; | ||
417 | set_nport(lsa, htons(port)); | ||
418 | |||
419 | *dfpp = open_socket(lsa); | ||
420 | |||
421 | if (G.beg_range) { | ||
422 | sprintf(buf, "REST %"OFF_FMT"u", G.beg_range); | ||
423 | if (ftpcmd(buf, NULL, sfp, buf) == 350) | ||
424 | G.content_len -= G.beg_range; | ||
425 | } | ||
426 | |||
427 | if (ftpcmd("RETR ", target->path, sfp, buf) > 150) | ||
428 | bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(buf)); | ||
429 | |||
430 | return sfp; | ||
431 | } | ||
432 | |||
433 | /* Must match option string! */ | ||
434 | enum { | ||
435 | WGET_OPT_CONTINUE = (1 << 0), | ||
436 | WGET_OPT_SPIDER = (1 << 1), | ||
437 | WGET_OPT_QUIET = (1 << 2), | ||
438 | WGET_OPT_OUTNAME = (1 << 3), | ||
439 | WGET_OPT_PREFIX = (1 << 4), | ||
440 | WGET_OPT_PROXY = (1 << 5), | ||
441 | WGET_OPT_USER_AGENT = (1 << 6), | ||
442 | WGET_OPT_RETRIES = (1 << 7), | ||
443 | WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 8), | ||
444 | WGET_OPT_PASSIVE = (1 << 9), | ||
445 | WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS, | ||
446 | WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS, | ||
447 | }; | ||
448 | |||
449 | static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd) | ||
450 | { | ||
451 | char buf[512]; | ||
452 | |||
453 | if (!(option_mask32 & WGET_OPT_QUIET)) | ||
454 | progress_meter(-1); | ||
455 | |||
456 | if (G.chunked) | ||
457 | goto get_clen; | ||
458 | |||
459 | /* Loops only if chunked */ | ||
460 | while (1) { | ||
461 | while (1) { | ||
462 | int n; | ||
463 | unsigned rdsz; | ||
464 | |||
465 | rdsz = sizeof(buf); | ||
466 | if (G.got_clen) { | ||
467 | if (G.content_len < (off_t)sizeof(buf)) { | ||
468 | if ((int)G.content_len <= 0) | ||
469 | break; | ||
470 | rdsz = (unsigned)G.content_len; | ||
471 | } | ||
472 | } | ||
473 | n = safe_fread(buf, rdsz, dfp); | ||
474 | if (n <= 0) { | ||
475 | if (ferror(dfp)) { | ||
476 | /* perror will not work: ferror doesn't set errno */ | ||
477 | bb_error_msg_and_die(bb_msg_read_error); | ||
478 | } | ||
479 | break; | ||
480 | } | ||
481 | xwrite(output_fd, buf, n); | ||
482 | #if ENABLE_FEATURE_WGET_STATUSBAR | ||
483 | G.transferred += n; | ||
484 | #endif | ||
485 | if (G.got_clen) | ||
486 | G.content_len -= n; | ||
487 | } | ||
488 | |||
489 | if (!G.chunked) | ||
490 | break; | ||
491 | |||
492 | safe_fgets(buf, sizeof(buf), dfp); /* This is a newline */ | ||
493 | get_clen: | ||
494 | safe_fgets(buf, sizeof(buf), dfp); | ||
495 | G.content_len = STRTOOFF(buf, NULL, 16); | ||
496 | /* FIXME: error check? */ | ||
497 | if (G.content_len == 0) | ||
498 | break; /* all done! */ | ||
499 | G.got_clen = 1; | ||
500 | } | ||
501 | |||
502 | if (!(option_mask32 & WGET_OPT_QUIET)) | ||
503 | progress_meter(0); | ||
504 | } | ||
505 | |||
506 | niro | 816 | int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
507 | int wget_main(int argc UNUSED_PARAM, char **argv) | ||
508 | { | ||
509 | niro | 532 | char buf[512]; |
510 | struct host_info server, target; | ||
511 | len_and_sockaddr *lsa; | ||
512 | unsigned opt; | ||
513 | niro | 984 | int redir_limit; |
514 | char *proxy = NULL; | ||
515 | niro | 532 | char *dir_prefix = NULL; |
516 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS | ||
517 | niro | 984 | char *post_data; |
518 | niro | 532 | char *extra_headers = NULL; |
519 | llist_t *headers_llist = NULL; | ||
520 | #endif | ||
521 | niro | 984 | FILE *sfp; /* socket to web/ftp server */ |
522 | niro | 816 | FILE *dfp; /* socket to ftp server (data) */ |
523 | char *fname_out; /* where to direct output (-O) */ | ||
524 | niro | 532 | int output_fd = -1; |
525 | niro | 984 | bool use_proxy; /* Use proxies if env vars are set */ |
526 | niro | 532 | const char *proxy_flag = "on"; /* Use proxies if env vars are set */ |
527 | niro | 816 | const char *user_agent = "Wget";/* "User-Agent" header field */ |
528 | niro | 532 | |
529 | niro | 816 | static const char keywords[] ALIGN1 = |
530 | "content-length\0""transfer-encoding\0""chunked\0""location\0"; | ||
531 | niro | 532 | enum { |
532 | niro | 816 | KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location |
533 | niro | 532 | }; |
534 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS | ||
535 | niro | 816 | static const char wget_longopts[] ALIGN1 = |
536 | /* name, has_arg, val */ | ||
537 | "continue\0" No_argument "c" | ||
538 | "spider\0" No_argument "s" | ||
539 | "quiet\0" No_argument "q" | ||
540 | "output-document\0" Required_argument "O" | ||
541 | "directory-prefix\0" Required_argument "P" | ||
542 | "proxy\0" Required_argument "Y" | ||
543 | "user-agent\0" Required_argument "U" | ||
544 | /* Ignored: */ | ||
545 | // "tries\0" Required_argument "t" | ||
546 | // "timeout\0" Required_argument "T" | ||
547 | /* Ignored (we always use PASV): */ | ||
548 | "passive-ftp\0" No_argument "\xff" | ||
549 | "header\0" Required_argument "\xfe" | ||
550 | niro | 984 | "post-data\0" Required_argument "\xfd" |
551 | /* Ignored (we don't do ssl) */ | ||
552 | "no-check-certificate\0" No_argument "\xfc" | ||
553 | niro | 816 | ; |
554 | niro | 532 | #endif |
555 | niro | 816 | |
556 | INIT_G(); | ||
557 | |||
558 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS | ||
559 | applet_long_options = wget_longopts; | ||
560 | #endif | ||
561 | /* server.allocated = target.allocated = NULL; */ | ||
562 | niro | 984 | opt_complementary = "-1" IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::"); |
563 | niro | 816 | opt = getopt32(argv, "csqO:P:Y:U:" /*ignored:*/ "t:T:", |
564 | niro | 532 | &fname_out, &dir_prefix, |
565 | niro | 816 | &proxy_flag, &user_agent, |
566 | NULL, /* -t RETRIES */ | ||
567 | NULL /* -T NETWORK_READ_TIMEOUT */ | ||
568 | niro | 984 | IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist) |
569 | IF_FEATURE_WGET_LONG_OPTIONS(, &post_data) | ||
570 | niro | 532 | ); |
571 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS | ||
572 | if (headers_llist) { | ||
573 | int size = 1; | ||
574 | char *cp; | ||
575 | niro | 816 | llist_t *ll = headers_llist; |
576 | niro | 532 | while (ll) { |
577 | size += strlen(ll->data) + 2; | ||
578 | ll = ll->link; | ||
579 | } | ||
580 | extra_headers = cp = xmalloc(size); | ||
581 | while (headers_llist) { | ||
582 | niro | 816 | cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist)); |
583 | niro | 532 | } |
584 | } | ||
585 | #endif | ||
586 | |||
587 | niro | 984 | /* TODO: compat issue: should handle "wget URL1 URL2..." */ |
588 | |||
589 | target.user = NULL; | ||
590 | niro | 532 | parse_url(argv[optind], &target); |
591 | |||
592 | niro | 816 | /* Use the proxy if necessary */ |
593 | niro | 984 | use_proxy = (strcmp(proxy_flag, "off") != 0); |
594 | niro | 532 | if (use_proxy) { |
595 | proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy"); | ||
596 | niro | 984 | if (proxy && proxy[0]) { |
597 | server.user = NULL; | ||
598 | niro | 532 | parse_url(proxy, &server); |
599 | } else { | ||
600 | use_proxy = 0; | ||
601 | } | ||
602 | } | ||
603 | niro | 984 | if (!use_proxy) { |
604 | server.port = target.port; | ||
605 | if (ENABLE_FEATURE_IPV6) { | ||
606 | server.host = xstrdup(target.host); | ||
607 | } else { | ||
608 | server.host = target.host; | ||
609 | } | ||
610 | } | ||
611 | niro | 532 | |
612 | niro | 984 | if (ENABLE_FEATURE_IPV6) |
613 | strip_ipv6_scope_id(target.host); | ||
614 | |||
615 | niro | 816 | /* Guess an output filename, if there was no -O FILE */ |
616 | if (!(opt & WGET_OPT_OUTNAME)) { | ||
617 | fname_out = bb_get_last_path_component_nostrip(target.path); | ||
618 | /* handle "wget http://kernel.org//" */ | ||
619 | if (fname_out[0] == '/' || !fname_out[0]) | ||
620 | fname_out = (char*)"index.html"; | ||
621 | /* -P DIR is considered only if there was no -O FILE */ | ||
622 | if (dir_prefix) | ||
623 | fname_out = concat_path_file(dir_prefix, fname_out); | ||
624 | } else { | ||
625 | if (LONE_DASH(fname_out)) { | ||
626 | /* -O - */ | ||
627 | output_fd = 1; | ||
628 | opt &= ~WGET_OPT_CONTINUE; | ||
629 | niro | 532 | } |
630 | niro | 816 | } |
631 | niro | 532 | #if ENABLE_FEATURE_WGET_STATUSBAR |
632 | niro | 984 | G.curfile = bb_get_last_path_component_nostrip(fname_out); |
633 | niro | 532 | #endif |
634 | niro | 816 | |
635 | niro | 532 | /* Impossible? |
636 | if ((opt & WGET_OPT_CONTINUE) && !fname_out) | ||
637 | niro | 984 | bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)"); |
638 | */ | ||
639 | niro | 532 | |
640 | niro | 816 | /* Determine where to start transfer */ |
641 | niro | 532 | if (opt & WGET_OPT_CONTINUE) { |
642 | output_fd = open(fname_out, O_WRONLY); | ||
643 | if (output_fd >= 0) { | ||
644 | niro | 984 | G.beg_range = xlseek(output_fd, 0, SEEK_END); |
645 | niro | 532 | } |
646 | /* File doesn't exist. We do not create file here yet. | ||
647 | niro | 984 | * We are not sure it exists on remove side */ |
648 | niro | 532 | } |
649 | |||
650 | niro | 984 | redir_limit = 5; |
651 | resolve_lsa: | ||
652 | niro | 816 | lsa = xhost2sockaddr(server.host, server.port); |
653 | niro | 532 | if (!(opt & WGET_OPT_QUIET)) { |
654 | niro | 984 | char *s = xmalloc_sockaddr2dotted(&lsa->u.sa); |
655 | fprintf(stderr, "Connecting to %s (%s)\n", server.host, s); | ||
656 | free(s); | ||
657 | niro | 532 | } |
658 | niro | 984 | establish_session: |
659 | niro | 532 | if (use_proxy || !target.is_ftp) { |
660 | /* | ||
661 | * HTTP session | ||
662 | */ | ||
663 | niro | 984 | char *str; |
664 | int status; | ||
665 | niro | 532 | |
666 | niro | 984 | /* Open socket to http server */ |
667 | sfp = open_socket(lsa); | ||
668 | niro | 532 | |
669 | niro | 984 | /* Send HTTP request */ |
670 | if (use_proxy) { | ||
671 | fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n", | ||
672 | target.is_ftp ? "f" : "ht", target.host, | ||
673 | target.path); | ||
674 | } else { | ||
675 | if (opt & WGET_OPT_POST_DATA) | ||
676 | fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path); | ||
677 | else | ||
678 | niro | 532 | fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path); |
679 | niro | 984 | } |
680 | niro | 532 | |
681 | niro | 984 | fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n", |
682 | target.host, user_agent); | ||
683 | niro | 532 | |
684 | #if ENABLE_FEATURE_WGET_AUTHENTICATION | ||
685 | niro | 984 | if (target.user) { |
686 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6, | ||
687 | base64enc_512(buf, target.user)); | ||
688 | } | ||
689 | if (use_proxy && server.user) { | ||
690 | fprintf(sfp, "Proxy-Authorization: Basic %s\r\n", | ||
691 | base64enc_512(buf, server.user)); | ||
692 | } | ||
693 | niro | 532 | #endif |
694 | |||
695 | niro | 984 | if (G.beg_range) |
696 | fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range); | ||
697 | niro | 532 | #if ENABLE_FEATURE_WGET_LONG_OPTIONS |
698 | niro | 984 | if (extra_headers) |
699 | fputs(extra_headers, sfp); | ||
700 | |||
701 | if (opt & WGET_OPT_POST_DATA) { | ||
702 | char *estr = URL_escape(post_data); | ||
703 | fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n"); | ||
704 | fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s", | ||
705 | (int) strlen(estr), estr); | ||
706 | /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/ | ||
707 | /*fprintf(sfp, "%s\r\n", estr);*/ | ||
708 | free(estr); | ||
709 | } else | ||
710 | niro | 532 | #endif |
711 | niro | 984 | { /* If "Connection:" is needed, document why */ |
712 | fprintf(sfp, /* "Connection: close\r\n" */ "\r\n"); | ||
713 | } | ||
714 | niro | 532 | |
715 | niro | 984 | /* |
716 | * Retrieve HTTP response line and check for "200" status code. | ||
717 | */ | ||
718 | niro | 532 | read_response: |
719 | niro | 984 | if (fgets(buf, sizeof(buf), sfp) == NULL) |
720 | bb_error_msg_and_die("no response from server"); | ||
721 | niro | 532 | |
722 | niro | 984 | str = buf; |
723 | str = skip_non_whitespace(str); | ||
724 | str = skip_whitespace(str); | ||
725 | // FIXME: no error check | ||
726 | // xatou wouldn't work: "200 OK" | ||
727 | status = atoi(str); | ||
728 | switch (status) { | ||
729 | case 0: | ||
730 | case 100: | ||
731 | while (gethdr(buf, sizeof(buf), sfp /*, &n*/) != NULL) | ||
732 | /* eat all remaining headers */; | ||
733 | goto read_response; | ||
734 | case 200: | ||
735 | niro | 816 | /* |
736 | Response 204 doesn't say "null file", it says "metadata | ||
737 | has changed but data didn't": | ||
738 | |||
739 | "10.2.5 204 No Content | ||
740 | The server has fulfilled the request but does not need to return | ||
741 | an entity-body, and might want to return updated metainformation. | ||
742 | The response MAY include new or updated metainformation in the form | ||
743 | of entity-headers, which if present SHOULD be associated with | ||
744 | the requested variant. | ||
745 | |||
746 | If the client is a user agent, it SHOULD NOT change its document | ||
747 | view from that which caused the request to be sent. This response | ||
748 | is primarily intended to allow input for actions to take place | ||
749 | without causing a change to the user agent's active document view, | ||
750 | although any new or updated metainformation SHOULD be applied | ||
751 | to the document currently in the user agent's active view. | ||
752 | |||
753 | The 204 response MUST NOT include a message-body, and thus | ||
754 | is always terminated by the first empty line after the header fields." | ||
755 | |||
756 | However, in real world it was observed that some web servers | ||
757 | (e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero. | ||
758 | */ | ||
759 | niro | 984 | case 204: |
760 | break; | ||
761 | case 300: /* redirection */ | ||
762 | case 301: | ||
763 | case 302: | ||
764 | case 303: | ||
765 | break; | ||
766 | case 206: | ||
767 | if (G.beg_range) | ||
768 | niro | 532 | break; |
769 | niro | 984 | /* fall through */ |
770 | default: | ||
771 | bb_error_msg_and_die("server returned error: %s", sanitize_string(buf)); | ||
772 | } | ||
773 | |||
774 | /* | ||
775 | * Retrieve HTTP headers. | ||
776 | */ | ||
777 | while ((str = gethdr(buf, sizeof(buf), sfp /*, &n*/)) != NULL) { | ||
778 | /* gethdr converted "FOO:" string to lowercase */ | ||
779 | smalluint key; | ||
780 | /* strip trailing whitespace */ | ||
781 | char *s = strchrnul(str, '\0') - 1; | ||
782 | while (s >= str && (*s == ' ' || *s == '\t')) { | ||
783 | *s = '\0'; | ||
784 | s--; | ||
785 | niro | 532 | } |
786 | niro | 984 | key = index_in_strings(keywords, buf) + 1; |
787 | if (key == KEY_content_length) { | ||
788 | G.content_len = BB_STRTOOFF(str, NULL, 10); | ||
789 | if (G.content_len < 0 || errno) { | ||
790 | bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str)); | ||
791 | niro | 532 | } |
792 | niro | 984 | G.got_clen = 1; |
793 | continue; | ||
794 | } | ||
795 | if (key == KEY_transfer_encoding) { | ||
796 | if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked) | ||
797 | bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str)); | ||
798 | G.chunked = G.got_clen = 1; | ||
799 | } | ||
800 | if (key == KEY_location && status >= 300) { | ||
801 | if (--redir_limit == 0) | ||
802 | bb_error_msg_and_die("too many redirections"); | ||
803 | fclose(sfp); | ||
804 | G.got_clen = 0; | ||
805 | G.chunked = 0; | ||
806 | if (str[0] == '/') | ||
807 | /* free(target.allocated); */ | ||
808 | target.path = /* target.allocated = */ xstrdup(str+1); | ||
809 | /* lsa stays the same: it's on the same server */ | ||
810 | else { | ||
811 | parse_url(str, &target); | ||
812 | if (!use_proxy) { | ||
813 | server.host = target.host; | ||
814 | /* strip_ipv6_scope_id(target.host); - no! */ | ||
815 | /* we assume remote never gives us IPv6 addr with scope id */ | ||
816 | server.port = target.port; | ||
817 | niro | 532 | free(lsa); |
818 | niro | 984 | goto resolve_lsa; |
819 | } /* else: lsa stays the same: we use proxy */ | ||
820 | niro | 532 | } |
821 | niro | 984 | goto establish_session; |
822 | niro | 532 | } |
823 | niro | 984 | } |
824 | // if (status >= 300) | ||
825 | // bb_error_msg_and_die("bad redirection (no Location: header from server)"); | ||
826 | niro | 532 | |
827 | niro | 984 | /* For HTTP, data is pumped over the same connection */ |
828 | niro | 532 | dfp = sfp; |
829 | |||
830 | } else { | ||
831 | /* | ||
832 | * FTP session | ||
833 | */ | ||
834 | niro | 984 | sfp = prepare_ftp_session(&dfp, &target, lsa); |
835 | niro | 532 | } |
836 | |||
837 | niro | 816 | if (opt & WGET_OPT_SPIDER) { |
838 | if (ENABLE_FEATURE_CLEAN_UP) | ||
839 | fclose(sfp); | ||
840 | return EXIT_SUCCESS; | ||
841 | } | ||
842 | niro | 532 | |
843 | niro | 816 | if (output_fd < 0) { |
844 | int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL; | ||
845 | /* compat with wget: -O FILE can overwrite */ | ||
846 | if (opt & WGET_OPT_OUTNAME) | ||
847 | o_flags = O_WRONLY | O_CREAT | O_TRUNC; | ||
848 | output_fd = xopen(fname_out, o_flags); | ||
849 | } | ||
850 | niro | 532 | |
851 | niro | 984 | retrieve_file_data(dfp, output_fd); |
852 | xclose(output_fd); | ||
853 | niro | 532 | |
854 | niro | 984 | if (dfp != sfp) { |
855 | /* It's ftp. Close it properly */ | ||
856 | niro | 532 | fclose(dfp); |
857 | if (ftpcmd(NULL, NULL, sfp, buf) != 226) | ||
858 | niro | 984 | bb_error_msg_and_die("ftp error: %s", sanitize_string(buf+4)); |
859 | /* ftpcmd("QUIT", NULL, sfp, buf); - why bother? */ | ||
860 | niro | 532 | } |
861 | |||
862 | niro | 816 | return EXIT_SUCCESS; |
863 | niro | 532 | } |