9 typedef struct URL URL;
24 typedef struct Range Range;
27 long start; /* only 2 gig supported, tdb */
31 typedef struct Out Out;
35 int offset; /* notional current offset in output */
36 int written; /* number of bytes successfully transferred to output */
37 DigestState *curr; /* digest state up to offset (if known) */
38 DigestState *hiwat; /* digest state of all bytes written */
61 int doftp(URL*, URL*, Range*, Out*, long);
62 int dohttp(URL*, URL*, Range*, Out*, long);
63 int crackurl(URL*, char*);
64 Range* crackrange(char*);
65 int getheader(int, char*, int);
66 int httpheaders(int, int, URL*, Range*);
68 int cistrncmp(char*, char*, int);
69 int cistrcmp(char*, char*);
71 int readline(int, char*, int);
72 int readibuf(int, char*, int);
73 int dfprint(int, char*, ...);
74 void unreadline(char*);
75 int output(Out*, char*, int);
76 void setoffset(Out*, int);
80 char tcpdir[NETPATHLEN];
85 int (*f)(URL*, URL*, Range*, Out*, long);
87 [Http] { "http", dohttp },
88 [Https] { "https", dohttp },
89 [Ftp] { "ftp", doftp },
90 [Other] { "_______", nil },
96 fprint(2, "usage: %s [-dhv] [-o outfile] [-p body] [-x netmtpt] [-r header] url\n", argv0);
101 main(int argc, char **argv)
108 char postbody[4096], *p, *e, *t, *hpx;
114 e = p + sizeof(postbody);
118 memset(&u, 0, sizeof(u));
119 memset(&px, 0, sizeof(px));
120 hpx = getenv("httpproxy");
124 ofile = EARGF(usage());
136 net = EARGF(usage());
139 u.rhead = EARGF(usage());
144 p = seprint(p, e, "&%s", t);
146 p = seprint(p, e, "%s", t);
147 u.postbody = postbody;
155 if(strlen(net) > sizeof(tcpdir)-5)
156 sysfatal("network mount point too long");
157 snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
159 snprint(tcpdir, sizeof(tcpdir), "tcp");
173 out.fd = create(ofile, OWRITE, 0664);
175 sysfatal("creating %s: %r", ofile);
177 out.fd = open(ofile, OWRITE);
179 sysfatal("can't open %s: %r", ofile);
188 if(crackurl(&u, argv[0]) < 0)
190 if(hpx && crackurl(&px, hpx) < 0)
197 n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
206 sysfatal("too many errors with no progress %r");
209 sysfatal("server returned: %r");
213 /* forward progress */
224 crackurl(URL *u, char *s)
238 if(u->method == Other){
239 werrstr("missing method");
243 werrstr("missing host");
249 if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
252 for(i = 0; i < nelem(method); i++){
253 if(cistrcmp(s, method[i].name) == 0){
262 if(u->method == Other){
263 werrstr("unsupported URL type %s", s);
273 u->page = strdup("/");
281 if(p = strchr(u->host, ':')) {
285 u->port = method[u->method].name;
288 werrstr("bad url, null host");
296 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
300 "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
315 d.mtime = note.mtime;
316 if(dirfwstat(note.fd, &d) < 0)
317 sysfatal("catch: can't dirfwstat: %r");
322 dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
325 int redirect, auth, loop;
333 /* always move back to a previous 512 byte bound because some
334 * servers can't seem to deal with requests that start at the
338 r->start = ((r->start-1)/512)*512;
340 /* loop for redirects, requires reading both response code and headers */
342 for(loop = 0; loop < 32; loop++){
344 fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
346 fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
351 if(u->method == Https){
355 memset(&conn, 0, sizeof conn);
356 tfd = tlsClient(fd, &conn);
358 fprint(2, "tlsClient: %r\n");
362 /* BUG: check cert here? */
369 /* write request, use range if not start of file */
370 if(u->postbody == nil){
372 dfprint(fd, "GET %s HTTP/1.0\r\n"
374 "User-agent: Plan9/hget\r\n"
375 "Cache-Control: no-cache\r\n"
376 "Pragma: no-cache\r\n",
379 dfprint(fd, "GET http://%s%s HTTP/1.0\r\n"
381 "User-agent: Plan9/hget\r\n"
382 "Cache-Control: no-cache\r\n"
383 "Pragma: no-cache\r\n",
384 u->host, u->page, u->host);
387 dfprint(fd, "POST %s HTTP/1.0\r\n"
389 "Content-type: application/x-www-form-urlencoded\r\n"
390 "Content-length: %d\r\n"
391 "User-agent: Plan9/hget\r\n",
392 u->page, u->host, strlen(u->postbody));
395 dfprint(fd, "Authorization: Basic %s\r\n", u->cred);
397 dfprint(fd, "%s\r\n", u->rhead);
399 dfprint(fd, "Range: bytes=%d-\n", r->start);
401 dfprint(fd, "If-range: %s\n", u->etag);
404 dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
405 day[tm->wday], tm->mday, month[tm->mon],
406 tm->year+1900, tm->hour, tm->min, tm->sec);
409 if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
410 if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
411 while((n = read(cfd, buf, sizeof buf)) > 0){
422 dfprint(fd, "\r\n", u->host);
424 dfprint(fd, "%s", u->postbody);
429 code = httprcode(fd);
431 case Error: /* connection timed out */
438 case 201: /* Created */
439 case 202: /* Accepted */
440 if(ofile == nil && r->start != 0)
441 sysfatal("page changed underfoot");
444 case 204: /* No Content */
445 sysfatal("No Content");
447 case 206: /* Partial Content */
448 setoffset(out, r->start);
451 case 301: /* Moved Permanently */
452 case 302: /* Moved Temporarily (actually Found) */
453 case 303: /* See Other */
454 case 307: /* Temporary Redirect (HTTP/1.1) */
459 case 304: /* Not Modified */
462 case 400: /* Bad Request */
463 sysfatal("Bad Request");
465 case 401: /* Unauthorized */
467 sysfatal("Authentication failed");
472 sysfatal("Unauthorized");
474 case 403: /* Forbidden */
475 sysfatal("Forbidden by server");
477 case 404: /* Not Found */
478 sysfatal("Not found on server");
480 case 407: /* Proxy Authentication */
481 sysfatal("Proxy authentication required");
483 case 500: /* Internal server error */
484 sysfatal("Server choked");
486 case 501: /* Not implemented */
487 sysfatal("Server can't do it!");
489 case 502: /* Bad gateway */
490 sysfatal("Bad gateway");
492 case 503: /* Service unavailable */
493 sysfatal("Service unavailable");
496 sysfatal("Unknown response code %d", code);
499 if(u->redirect != nil){
504 rv = httpheaders(fd, cfd, u, r);
511 if(!redirect && !auth)
515 if(u->redirect == nil)
516 sysfatal("redirect: no URL");
517 if(crackurl(u, u->redirect) < 0)
518 sysfatal("redirect: %r");
522 /* transfer whatever you get */
523 if(ofile != nil && u->mtime != 0){
525 note.mtime = u->mtime;
532 n = readibuf(fd, buf, sizeof(buf));
535 if(output(out, buf, n) != n)
538 if(verbose && (vtime != time(0) || r->start == r->end)) {
540 fprint(2, "%ld %ld\n", r->start+tot, r->end);
546 if(ofile != nil && u->mtime != 0){
549 rerrstr(err, sizeof err);
552 if(dirfwstat(out->fd, &d) < 0)
553 fprint(2, "couldn't set mtime: %r\n");
554 errstr(err, sizeof err);
560 /* get the http response code */
568 n = readline(fd, buf, sizeof(buf)-1);
572 fprint(2, "%d <- %s\n", fd, buf);
573 p = strchr(buf, ' ');
574 if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
575 werrstr("bad response from server");
582 /* read in and crack the http headers, update u and r */
583 void hhetag(char*, URL*, Range*);
584 void hhmtime(char*, URL*, Range*);
585 void hhclen(char*, URL*, Range*);
586 void hhcrange(char*, URL*, Range*);
587 void hhuri(char*, URL*, Range*);
588 void hhlocation(char*, URL*, Range*);
589 void hhauth(char*, URL*, Range*);
593 void (*f)(char*, URL*, Range*);
596 { "last-modified:", hhmtime },
597 { "content-length:", hhclen },
598 { "content-range:", hhcrange },
600 { "location:", hhlocation },
601 { "WWW-Authenticate:", hhauth },
604 httpheaders(int fd, int cfd, URL *u, Range *r)
611 n = getheader(fd, buf, sizeof(buf));
615 fprint(cfd, "%s\n", buf);
616 for(i = 0; i < nelem(headers); i++){
617 n = strlen(headers[i].name);
618 if(cistrncmp(buf, headers[i].name, n) == 0){
619 /* skip field name and leading white */
621 while(*p == ' ' || *p == '\t')
624 (*headers[i].f)(p, u, r);
633 * read a single mime header, collect continuations.
635 * this routine assumes that there is a blank line twixt
636 * the header and the message body, otherwise bytes will
640 getheader(int fd, char *buf, int n)
647 for(e = p + n; ; p += i){
648 i = readline(fd, p, e-p);
654 if(strchr(buf, ':') == nil)
655 break; /* end of headers */
657 /* continuation line */
658 if(*p != ' ' && *p != '\t'){
661 break; /* end of this header */
669 fprint(2, "%d <- %s\n", fd, buf);
674 hhetag(char *p, URL *u, Range*)
677 if(strcmp(u->etag, p) != 0)
678 sysfatal("file changed underfoot");
683 char* monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
686 hhmtime(char *p, URL *u, Range*)
688 char *month, *day, *yr, *hms;
693 i = getfields(p, fields, 6, 1, " \t");
703 now = *gmtime(time(0));
707 /* convert ascii month to a number twixt 1 and 12 */
708 if(*month >= '0' && *month <= '9'){
709 tm.mon = atoi(month) - 1;
710 if(tm.mon < 0 || tm.mon > 11)
713 for(p = month; *p; p++)
715 for(i = 0; i < 12; i++)
716 if(strncmp(&monthchars[i*3], month, 3) == 0){
725 tm.hour = strtoul(hms, &p, 10);
728 tm.min = strtoul(p, &p, 10);
731 tm.sec = strtoul(p, &p, 10);
734 if(tolower(*p) == 'p')
743 if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
747 strcpy(tm.zone, "GMT");
748 /* convert to epoch seconds */
749 u->mtime = tm2sec(&tm);
753 hhclen(char *p, URL*, Range *r)
759 hhcrange(char *p, URL*, Range *r)
778 hhuri(char *p, URL *u, Range*)
782 u->redirect = strdup(p+1);
783 p = strchr(u->redirect, '>');
789 hhlocation(char *p, URL *u, Range*)
791 u->redirect = strdup(p);
795 hhauth(char *p, URL *u, Range*)
801 if (cistrncmp(p, "basic ", 6) != 0)
802 sysfatal("only Basic authentication supported");
804 if (gettokens(p, f, nelem(f), "\"") < 2)
805 sysfatal("garbled auth data");
807 if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http server=%q realm=%q",
808 u->host, f[1])) == nil)
809 sysfatal("cannot authenticate");
811 s = smprint("%s:%s", up->user, up->passwd);
812 if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
816 assert(u->cred = strdup(cred));
821 /* ftp return codes */
828 Nnetdir= 64, /* max length of network directory paths */
829 Ndialstr= 64, /* max length of dial strings */
832 int ftpcmd(int, char*, ...);
833 int ftprcode(int, char*, int);
836 int xfertype(int, char*);
837 int passive(int, URL*);
838 int active(int, URL*);
839 int ftpxfer(int, Out*, Range*);
840 int terminateftp(int, int);
841 int getaddrport(char*, uchar*, uchar*);
842 int ftprestart(int, Out*, URL*, Range*, long);
845 doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
847 int pid, ctl, data, rv;
850 char conndir[NETPATHLEN];
853 /* untested, proxy doesn't work with ftp (I think) */
855 ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, conndir, 0);
857 ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, conndir, 0);
863 p = strrchr(conndir, '/');
865 snprint(tcpdir, sizeof(tcpdir), conndir);
872 return terminateftp(ctl, rv);
876 return terminateftp(ctl, rv);
878 rv = xfertype(ctl, "I");
880 return terminateftp(ctl, rv);
882 /* if file is up to date and the right size, stop */
883 if(ftprestart(ctl, out, u, r, mtime) > 0){
888 /* first try passive mode, then active */
889 data = passive(ctl, u);
891 data = active(ctl, u);
897 switch(pid = rfork(RFPROC|RFFDG|RFMEM)){
900 return terminateftp(ctl, Error);
902 ftpxfer(data, out, r);
910 /* wait for reply message */
911 rv = ftprcode(ctl, msg, sizeof(msg));
914 /* wait for process to terminate */
926 werrstr("xfer: %s", w->msg);
943 ftpcmd(int ctl, char *fmt, ...)
946 char buf[2*1024], *s;
949 s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
952 fprint(2, "%d -> %s\n", ctl, buf);
955 if(write(ctl, buf, s - buf) != s - buf)
961 ftprcode(int ctl, char *msg, int len)
967 len--; /* room for terminating null */
970 i = readline(ctl, msg, len);
974 fprint(2, "%d <- %s\n", ctl, msg);
976 /* stop if not a continuation */
977 rv = strtol(msg, &p, 10);
978 if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
991 /* wait for hello from other side */
992 if(ftprcode(ctl, msg, sizeof(msg)) != Success){
993 werrstr("HELLO: %s", msg);
1000 getdec(char *p, int n)
1005 for(i = 0; i < n; i++)
1006 x = x*10 + (*p++ - '0');
1011 ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
1017 ftpcmd(ctl, "MDTM %s", u->page);
1018 if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1020 return 0; /* need to do something */
1023 /* decode modification time */
1024 if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
1026 return 0; /* need to do something */
1028 memset(&tm, 0, sizeof(tm));
1029 tm.year = getdec(msg+4, 4) - 1900;
1030 tm.mon = getdec(msg+4+4, 2) - 1;
1031 tm.mday = getdec(msg+4+4+2, 2);
1032 tm.hour = getdec(msg+4+4+2+2, 2);
1033 tm.min = getdec(msg+4+4+2+2+2, 2);
1034 tm.sec = getdec(msg+4+4+2+2+2+2, 2);
1035 strcpy(tm.zone, "GMT");
1036 rmtime = tm2sec(&tm);
1041 ftpcmd(ctl, "SIZE %s", u->page);
1042 if(ftprcode(ctl, msg, sizeof(msg)) == Success){
1045 return 1; /* we're up to date */
1049 /* seek to restart point */
1051 ftpcmd(ctl, "REST %lud", r->start);
1052 if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
1053 setoffset(out, r->start);
1058 return 0; /* need to do something */
1066 /* login anonymous */
1067 ftpcmd(ctl, "USER anonymous");
1068 switch(ftprcode(ctl, msg, sizeof(msg))){
1072 break; /* need password */
1074 werrstr("USER: %s", msg);
1078 /* send user id as password */
1079 sprint(msg, "%s@closedmind.org", getuser());
1080 ftpcmd(ctl, "PASS %s", msg);
1081 if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1082 werrstr("PASS: %s", msg);
1090 xfertype(int ctl, char *t)
1094 ftpcmd(ctl, "TYPE %s", t);
1095 if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1096 werrstr("TYPE %s: %s", t, msg);
1104 passive(int ctl, URL *u)
1114 ftpcmd(ctl, "PASV");
1115 if(ftprcode(ctl, msg, sizeof(msg)) != Success)
1118 /* get address and port number from reply, this is AI */
1119 p = strchr(msg, '(');
1121 for(p = msg+3; *p; p++)
1126 if(getfields(p, f, 6, 0, ",)") < 6){
1127 werrstr("ftp protocol botch");
1130 snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
1131 f[0], f[1], f[2], f[3]);
1132 port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
1133 sprint(aport, "%d", port);
1135 /* open data connection */
1136 fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
1138 werrstr("passive mode failed: %r");
1142 /* tell remote to send a file */
1143 ftpcmd(ctl, "RETR %s", u->page);
1144 if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1145 werrstr("RETR %s: %s", u->page, msg);
1152 active(int ctl, URL *u)
1155 char dir[40], ldir[40];
1160 /* announce a port for the call back */
1161 snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
1162 afd = announce(msg, dir);
1166 /* get a local address/port of the annoucement */
1167 if(getaddrport(dir, ipaddr, port) < 0){
1172 /* tell remote side address and port*/
1173 ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
1174 ipaddr[3], port[0], port[1]);
1175 if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1177 werrstr("active: %s", msg);
1181 /* tell remote to send a file */
1182 ftpcmd(ctl, "RETR %s", u->page);
1183 if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1185 werrstr("RETR: %s", msg);
1189 /* wait for a connection */
1190 lcfd = listen(dir, ldir);
1195 dfd = accept(lcfd, ldir);
1208 ftpxfer(int in, Out *out, Range *r)
1216 i = read(in, buf, sizeof(buf));
1221 if(output(out, buf, i) != i)
1224 if(verbose && (vtime != time(0) || r->start == r->end)) {
1226 fprint(2, "%ld %ld\n", r->start, r->end);
1233 terminateftp(int ctl, int rv)
1240 * case insensitive strcmp (why aren't these in libc?)
1243 cistrncmp(char *a, char *b, int n)
1246 if(tolower(*a++) != tolower(*b++))
1253 cistrcmp(char *a, char *b)
1256 if(tolower(*a++) != tolower(*b++))
1275 b.rp = b.wp = b.buf;
1279 * read a possibly buffered line, strip off trailing while
1282 readline(int fd, char *buf, int len)
1292 n = read(fd, b.wp, sizeof(b.buf)/2);
1310 /* drop trailing white */
1315 if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
1328 unreadline(char *line)
1334 memmove(&b.buf[i+1], b.rp, n);
1335 memmove(b.buf, line, i);
1338 b.wp = b.rp + i + 1 + n;
1342 readibuf(int fd, char *buf, int len)
1350 memmove(buf, b.rp, n);
1354 return read(fd, buf, len);
1358 dfprint(int fd, char *fmt, ...)
1364 vseprint(buf, buf+sizeof(buf), fmt, arg);
1367 fprint(2, "%d -> %s", fd, buf);
1368 return fprint(fd, "%s", buf);
1372 getaddrport(char *dir, uchar *ipaddr, uchar *port)
1378 snprint(buf, sizeof(buf), "%s/local", dir);
1379 fd = open(buf, OREAD);
1382 i = read(fd, buf, sizeof(buf)-1);
1387 p = strchr(buf, '!');
1390 v4parseip(ipaddr, buf);
1398 md5free(DigestState *state)
1401 md5(nil, 0, x, state);
1405 md5dup(DigestState *state)
1409 p = md5pickle(state);
1411 sysfatal("md5pickle: %r");
1412 state = md5unpickle(p);
1414 sysfatal("md5unpickle: %r");
1420 setoffset(Out *out, int offset)
1424 out->curr = md5(nil, 0, nil, nil);
1427 out->offset = offset;
1428 out->written = offset;
1430 if(seek(out->fd, offset, 0) != offset)
1431 sysfatal("seek: %r");
1435 * write some output, discarding it (but keeping track)
1436 * if we've already written it. if we've gone backwards,
1437 * verify that everything previously written matches
1438 * that which would have been written from the current
1442 output(Out *out, char *buf, int nb)
1445 uchar m0[MD5dlen], m1[MD5dlen];
1448 d = out->written - out->offset;
1452 if(out->curr != nil)
1453 md5((uchar*)buf, n, nil, out->curr);
1457 if(out->curr != nil){
1458 md5((uchar*)buf, d, m0, out->curr);
1460 md5(nil, 0, m1, md5dup(out->hiwat));
1461 if(memcmp(m0, m1, MD5dlen) != 0){
1462 fprint(2, "integrity check failure at offset %d\n", out->written);
1471 out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
1472 n = write(out->fd, buf, n);