2 * tar - `tape archiver', actually usable on any medium.
3 * POSIX "ustar" compliant when extracting, and by default when creating.
4 * this tar attempts to read and write multiple Tblock-byte blocks
5 * at once to and from the filesystem, and does not copy blocks
12 #include <fcall.h> /* for %M */
16 * modified versions of those in libc.h; scans only the first arg for
17 * keyletters and options.
20 (argv0 || (argv0 = *argv)), argv++, argc--;\
26 while(*_args && (_args += chartorune(&_argc, _args)))\
28 #define TARGEND SET(_argt); USED(_argt);USED(_argc);USED(_args); \
30 USED(argv); USED(argc); }
31 #define TARGC() (_argc)
33 #define ROUNDUP(a, b) (((a) + (b) - 1)/(b))
34 #define BYTES2TBLKS(bytes) ROUNDUP(bytes, Tblock)
36 /* read big-endian binary integers; args must be (uchar *) */
37 #define G2BEBYTE(x) (((x)[0]<<8) | (x)[1])
38 #define G3BEBYTE(x) (((x)[0]<<16) | ((x)[1]<<8) | (x)[2])
39 #define G4BEBYTE(x) (((x)[0]<<24) | ((x)[1]<<16) | ((x)[2]<<8) | (x)[3])
40 #define G8BEBYTE(x) (((vlong)G4BEBYTE(x)<<32) | (u32int)G4BEBYTE((x)+4))
43 typedef char *(*Refill)(int ar, char *bufs, int justhdr);
45 enum { Stdin, Stdout, Stderr };
46 enum { Rd, Wr }; /* pipe fd-array indices */
47 enum { Output, Input };
48 enum { None, Toc, Xtract, Replace };
49 enum { Alldata, Justnxthdr };
53 Maxpfx = 155, /* from POSIX */
54 Maxname = Namsiz + 1 + Maxpfx,
55 Binsize = 0x80, /* flag in size[0], from gnu: positive binary size */
56 Binnegsz = 0xff, /* flag in size[0]: negative binary size */
58 Nblock = 40, /* maximum blocksize */
59 Dblock = 20, /* default blocksize */
63 /* POSIX link flags */
69 LF_SYMLINK2 = 's', /* 4BSD used this */
75 /* 'A' - 'Z' are reserved for custom implementations */
78 #define islink(lf) (isreallink(lf) || issymlink(lf))
79 #define isreallink(lf) ((lf) == LF_LINK)
80 #define issymlink(lf) ((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
93 char linkname[Namsiz];
95 /* rest are defined by POSIX's ustar format; see p1003.2b */
96 char magic[6]; /* "ustar" */
102 char prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
112 static Compress comps[] = {
113 "gzip", "gunzip", { ".tar.gz", ".tgz" }, /* default */
114 "compress", "uncompress", { ".tar.Z", ".tz" },
115 "bzip2", "bunzip2", { ".tar.bz", ".tbz",
116 ".tar.bz2",".tbz2" },
121 int fd; /* original fd */
122 int rfd; /* replacement fd */
127 #define OTHER(rdwr) ((rdwr) == Rd? Wr: Rd)
130 static int fixednblock;
132 static int posix = 1;
136 static int relative = 1;
139 static int docompress;
140 static int keepexisting;
141 static int ignerrs; /* flag: ignore i/o errors if possible */
142 static Off blkoff; /* offset of the current archive block (not Tblock) */
145 static int nblock = Dblock;
147 static char *usefile, *arname = "archive";
148 static char origdir[Maxname*2];
149 static Hdr *tpblk, *endblk;
155 fprint(2, "usage: %s {crtx}[PRTfgikmpsuvz] [archive] [file1 file2...]\n",
160 /* I/O, with error retry or exit */
163 cope(char *name, int fd, void *buf, long len, Off off)
165 fprint(2, "%s: %serror reading %s: %r\n", argv0,
166 (ignerrs? "ignoring ": ""), name);
170 /* pretend we read len bytes of zeroes */
172 if (off >= 0) /* seekable? */
173 seek(fd, off + len, 0);
178 eread(char *name, int fd, void *buf, long len)
183 off = seek(fd, 0, 1); /* for coping with errors */
184 rd = read(fd, buf, len);
186 rd = cope(name, fd, buf, len, off);
191 ereadn(char *name, int fd, void *buf, long len)
196 off = seek(fd, 0, 1);
197 rd = readn(fd, buf, len);
199 rd = cope(name, fd, buf, len, off);
204 ewrite(char *name, int fd, void *buf, long len)
209 rd = write(fd, buf, len);
211 sysfatal("error writing %s: %r", name);
218 compmethod(char *name)
220 int i, nmlen = strlen(name), sfxlen;
223 for (cp = comps; cp < comps + nelem(comps); cp++)
224 for (i = 0; i < nelem(cp->sfx) && cp->sfx[i]; i++) {
225 sfxlen = strlen(cp->sfx[i]);
226 if (nmlen > sfxlen &&
227 strcmp(cp->sfx[i], name + nmlen - sfxlen) == 0)
230 return docompress? comps: nil;
234 * push a filter, cmd, onto fd. if input, it's an input descriptor.
235 * returns a descriptor to replace fd, or -1 on error.
238 push(int fd, char *cmd, int input, Pushstate *ps)
246 if (fd < 0 || pipe(pifds) < 0)
254 dup(pifds[Wr], Stdout);
256 dup(pifds[Rd], Stdin);
257 close(pifds[input? Rd: Wr]);
258 dup(fd, (input? Stdin: Stdout));
261 s_append(s, "/bin/");
263 execl(s_to_c(s), cmd, nil);
264 sysfatal("can't exec %s: %r", cmd);
266 nfd = pifds[input? Rd: Wr];
267 close(pifds[input? Wr: Rd]);
276 pushclose(Pushstate *ps)
280 if (ps->fd < 0 || ps->rfd < 0 || !ps->open)
285 while ((wm = wait()) != nil && wm->pid != ps->kid)
287 return wm? wm->msg: nil;
291 * block-buffer management
298 tpblk = malloc(Tblock * nblock);
299 assert(tpblk != nil);
300 endblk = tpblk + nblock;
304 * (re)fill block buffers from archive. `justhdr' means we don't care
305 * about the data before the next header block.
308 refill(int ar, char *bufs, int justhdr)
311 unsigned bytes = Tblock * nblock;
312 static int done, first = 1, seekable;
317 blkoff = seek(ar, 0, 1); /* note position for `tar r' */
319 seekable = blkoff >= 0;
320 /* try to size non-pipe input at first read */
321 if (first && usefile && !fixednblock) {
322 n = eread(arname, ar, bufs, bytes);
324 sysfatal("EOF reading archive %s: %r", arname);
327 sysfatal("%s: archive block size (%d) error", arname, i);
331 fprint(2, "%s: blocking = %d\n", argv0, nblock);
332 endblk = (Hdr *)bufs + nblock;
335 } else if (justhdr && seekable && nexthdr - blkoff >= bytes) {
336 /* optimisation for huge archive members on seekable media */
337 if (seek(ar, bytes, 1) < 0)
338 sysfatal("can't seek on archive %s: %r", arname);
341 n = ereadn(arname, ar, bufs, bytes);
345 sysfatal("unexpected EOF reading archive %s", arname);
347 sysfatal("partial block read from archive %s", arname);
350 memset(bufs + n, 0, bytes - n);
356 getblk(int ar, Refill rfp, int justhdr)
358 if (curblk == nil || curblk >= endblk) { /* input block exhausted? */
359 if (rfp != nil && (*rfp)(ar, (char *)tpblk, justhdr) == nil)
367 getblkrd(int ar, int justhdr)
369 return getblk(ar, refill, justhdr);
375 return getblk(ar, nil, Alldata);
381 Hdr *hp = getblke(ar);
384 memset(hp->data, 0, Tblock);
389 * how many block buffers are available, starting at the address
390 * just returned by getblk*?
395 int n = endblk - (curblk - 1);
397 return n > max? max: n;
401 * indicate that one is done with the last block obtained from getblke
402 * and it is now available to be written into the archive.
407 unsigned bytes = Tblock * nblock;
409 /* if writing end-of-archive, aid compression (good hygiene too) */
411 memset(curblk, 0, (char *)endblk - (char *)curblk);
412 ewrite(arname, ar, tpblk, bytes);
418 if (curblk >= endblk)
430 putreadblks(int ar, int blks)
437 putblkmany(int ar, int blks)
449 * modifies hp->chksum but restores it; important for the last block of the
450 * old archive when updating with `tar rf archive'
457 uchar *cp = hp->data;
458 char oldsum[sizeof hp->chksum];
460 memmove(oldsum, hp->chksum, sizeof oldsum);
461 memset(hp->chksum, ' ', sizeof hp->chksum);
464 memmove(hp->chksum, oldsum, sizeof oldsum);
471 return strcmp(hp->magic, "ustar") == 0;
475 * s is at most n bytes long, but need not be NUL-terminated.
476 * if shorter than n bytes, all bytes after the first NUL must also
480 strnlen(char *s, int n)
482 return s[n - 1] != '\0'? n: strlen(s);
485 /* set fullname from header */
491 static char fullnamebuf[2+Maxname+1]; /* 2+ for ./ on relative names */
493 fullname = fullnamebuf+2;
494 namlen = strnlen(hp->name, sizeof hp->name);
495 if (hp->prefix[0] == '\0' || !isustar(hp)) { /* old-style name? */
496 memmove(fullname, hp->name, namlen);
497 fullname[namlen] = '\0';
501 /* name is in two pieces */
502 pfxlen = strnlen(hp->prefix, sizeof hp->prefix);
503 memmove(fullname, hp->prefix, pfxlen);
504 fullname[pfxlen] = '/';
505 memmove(fullname + pfxlen + 1, hp->name, namlen);
506 fullname[pfxlen + 1 + namlen] = '\0';
513 /* the mode test is ugly but sometimes necessary */
514 return hp->linkflag == LF_DIR ||
515 strrchr(name(hp), '\0')[-1] == '/' ||
516 (strtoul(hp->mode, nil, 8)&0170000) == 040000;
522 return name(hp)[0] == '\0';
527 getbe(uchar *src, int size)
540 putbe(uchar *dest, uvlong vl, int size)
542 for (dest += size; size-- > 0; vl >>= 8)
547 * cautious parsing of octal numbers as ascii strings in
548 * a tar header block. this is particularly important for
549 * trusting the checksum when trying to resync.
552 hdrotoull(char *st, char *end, uvlong errval, char *name, char *field)
556 for (numb = st; (*numb == ' ' || *numb == '\0') && numb < end; numb++)
558 if (numb < end && isascii(*numb) && isdigit(*numb))
559 return strtoull(numb, nil, 8);
560 else if (numb >= end)
561 fprint(2, "%s: %s: empty %s in header\n", argv0, name, field);
563 fprint(2, "%s: %s: %s: non-numeric %s in header\n",
564 argv0, name, numb, field);
569 * return the nominal size from the header block, which is not always the
570 * size in the archive (the archive size may be zero for some file types
571 * regardless of the nominal size).
573 * gnu and freebsd tars are now recording vlongs as big-endian binary
574 * with a flag in byte 0 to indicate this, which permits file sizes up to
575 * 2^64-1 (actually 2^80-1 but our file sizes are vlongs) rather than 2^33-1.
582 if((uchar)hp->size[0] == Binnegsz) {
583 fprint(2, "%s: %s: negative length, which is insane\n",
586 } else if((uchar)hp->size[0] == Binsize) {
587 p = (uchar *)hp->size + sizeof hp->size - 1 -
588 sizeof(vlong); /* -1 for terminating space */
592 return hdrotoull(hp->size, hp->size + sizeof hp->size, 0,
597 * return the number of bytes recorded in the archive.
602 if(isdir(hp) || islink(hp->linkflag))
608 parsecksum(char *cksum, char *name)
612 return hdrotoull(cksum, cksum + sizeof hp->chksum, (uvlong)-1LL,
622 hp = getblkrd(ar, Alldata);
624 sysfatal("unexpected EOF instead of archive header in %s",
626 if (eotar(hp)) /* end-of-archive block? */
629 hdrcksum = parsecksum(hp->chksum, name(hp));
630 if (hdrcksum == -1 || chksum(hp) != hdrcksum) {
632 sysfatal("bad archive header checksum in %s: "
633 "name %.100s...; expected %#luo got %#luo",
634 arname, hp->name, hdrcksum, chksum(hp));
635 fprint(2, "%s: skipping past archive header with bad checksum in %s...",
638 hp = getblkrd(ar, Alldata);
640 sysfatal("unexpected EOF looking for archive header in %s",
642 hdrcksum = parsecksum(hp->chksum, name(hp));
643 } while (hdrcksum == -1 || chksum(hp) != hdrcksum);
644 fprint(2, "found %s\n", name(hp));
646 nexthdr += Tblock*(1 + BYTES2TBLKS(arsize(hp)));
655 * if name is longer than Namsiz bytes, try to split it at a slash and fit the
656 * pieces into hp->prefix and hp->name.
659 putfullname(Hdr *hp, char *name)
663 String *slname = nil;
667 s_append(slname, name);
668 s_append(slname, "/"); /* posix requires this */
669 name = s_to_c(slname);
672 namlen = strlen(name);
673 if (namlen <= Namsiz) {
674 strncpy(hp->name, name, Namsiz);
675 hp->prefix[0] = '\0'; /* ustar paranoia */
679 if (!posix || namlen > Maxname) {
680 fprint(2, "%s: name too long for tar header: %s\n",
685 * try various splits until one results in pieces that fit into the
686 * appropriate fields of the header. look for slashes from right
687 * to left, in the hopes of putting the largest part of the name into
688 * hp->prefix, which is larger than hp->name.
690 sl = strrchr(name, '/');
693 if (pfxlen <= sizeof hp->prefix && namlen-1 - pfxlen <= Namsiz)
697 sl = strrchr(name, '/');
701 fprint(2, "%s: name can't be split to fit tar header: %s\n",
706 strncpy(hp->prefix, name, sizeof hp->prefix);
708 strncpy(hp->name, sl, sizeof hp->name);
715 mkhdr(Hdr *hp, Dir *dir, char *file)
720 * some of these fields run together, so we format them left-to-right
721 * and don't use snprint.
723 sprint(hp->mode, "%6lo ", dir->mode & 0777);
724 sprint(hp->uid, "%6o ", aruid);
725 sprint(hp->gid, "%6o ", argid);
726 if (dir->length >= (Off)1<<32) {
731 fprint(2, "%s: storing large sizes in \"base 256\"\n", argv0);
733 hp->size[0] = Binsize;
734 /* emit so-called `base 256' representation of size */
735 putbe((uchar *)hp->size+1, dir->length, sizeof hp->size - 2);
736 hp->size[sizeof hp->size - 1] = ' ';
738 sprint(hp->size, "%11lluo ", dir->length);
739 sprint(hp->mtime, "%11luo ", dir->mtime);
740 hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
741 r = putfullname(hp, file);
743 strncpy(hp->magic, "ustar", sizeof hp->magic);
744 strncpy(hp->version, "00", sizeof hp->version);
745 strncpy(hp->uname, dir->uid, sizeof hp->uname);
746 strncpy(hp->gname, dir->gid, sizeof hp->gname);
748 sprint(hp->chksum, "%6luo", chksum(hp));
752 static void addtoar(int ar, char *file, char *shortf);
755 addtreetoar(int ar, char *file, char *shortf, int fd)
759 String *name = s_new();
761 n = dirreadall(fd, &dirents);
763 fprint(2, "%s: dirreadall %s: %r\n", argv0, file);
768 if (chdir(shortf) < 0)
769 sysfatal("chdir %s: %r", file);
771 fprint(2, "chdir %s\t# %s\n", shortf, file);
773 for (dent = dirents; dent < dirents + n; dent++) {
775 s_append(name, file);
777 s_append(name, dent->name);
778 addtoar(ar, s_to_c(name), dent->name);
784 * this assumes that shortf is just one component, which is true
785 * during directory descent, but not necessarily true of command-line
786 * arguments. Our caller (or addtoar's) must reset the working
787 * directory if necessary.
790 sysfatal("chdir %s/..: %r", file);
792 fprint(2, "chdir ..\n");
796 addtoar(int ar, char *file, char *shortf)
799 long bytes, blksread;
805 if (shortf[0] == '#') {
807 s_append(name, "./");
808 s_append(name, shortf);
809 shortf = s_to_c(name);
813 fprint(2, "opening %s # %s\n", shortf, file);
814 fd = open(shortf, OREAD);
816 fprint(2, "%s: can't open %s: %r\n", argv0, file);
823 sysfatal("can't fstat %s: %r", file);
826 isdir = (dir->qid.type & QTDIR) != 0;
827 if (mkhdr(hbp, dir, file) < 0) {
837 blksleft = BYTES2TBLKS(dir->length);
841 addtreetoar(ar, file, shortf, fd);
843 for (; blksleft > 0; blksleft -= blksread) {
845 blksread = gothowmany(blksleft);
846 assert(blksread >= 0);
847 bytes = blksread * Tblock;
848 n = ereadn(file, fd, hbp->data, bytes);
851 * ignore EOF. zero any partial block to aid
852 * compression and emergency recovery of data.
855 memset(hbp->data + n, 0, bytes - n);
856 putblkmany(ar, blksread);
860 fprint(2, "%s\n", file);
870 ulong blksleft, blksread;
873 Compress *comp = nil;
876 if (usefile && docreate) {
877 ar = create(usefile, OWRITE, 0666);
879 comp = compmethod(usefile);
881 ar = open(usefile, ORDWR);
885 ar = push(ar, comp->comp, Output, &ps);
887 sysfatal("can't open archive %s: %r", usefile);
889 if (usefile && !docreate) {
890 /* skip quickly to the end */
891 while ((hp = readhdr(ar)) != nil) {
893 for (blksleft = BYTES2TBLKS(bytes);
894 blksleft > 0 && getblkrd(ar, Justnxthdr) != nil;
895 blksleft -= blksread) {
896 blksread = gothowmany(blksleft);
897 putreadblks(ar, blksread);
901 * we have just read the end-of-archive Tblock.
902 * now seek back over the (big) archive block containing it,
903 * and back up curblk ptr over end-of-archive Tblock in memory.
905 if (seek(ar, blkoff, 0) < 0)
906 sysfatal("can't seek back over end-of-archive in %s: %r",
911 for (i = 0; argv[i] != nil; i++) {
912 addtoar(ar, argv[i], argv[i]);
913 chdir(origdir); /* for correctness & profiling */
916 /* write end-of-archive marker */
923 return pushclose(&ps);
933 /* is pfx a file-name prefix of name? */
935 prefix(char *name, char *pfx)
937 int pfxlen = strlen(pfx);
938 char clpfx[Maxname+1];
940 if (pfxlen > Maxname)
944 return strncmp(clpfx, name, pfxlen) == 0 &&
945 (name[pfxlen] == '\0' || name[pfxlen] == '/');
949 match(char *name, char **argv)
952 char clname[Maxname+1];
956 strcpy(clname, name);
958 for (i = 0; argv[i] != nil; i++)
959 if (prefix(clname, argv[i]))
965 cantcreate(char *s, int mode)
971 * Always print about files. Only print about directories
972 * we haven't printed about. (Assumes archive is ordered
977 /* already printed this directory */
978 if(strcmp(s, last) == 0)
980 /* printed a higher directory, so printed this one */
982 if(memcmp(s, last, len) == 0 && last[len] == '/')
989 fprint(2, "%s: can't create %s: %r\n", argv0, s);
997 if (access(s, AEXIST) == 0)
999 f = create(s, OREAD, DMDIR | 0777);
1003 cantcreate(s, DMDIR);
1015 while (!err && (p = strchr(p+1, '/')) != nil) {
1017 err = (access(s, AEXIST) < 0 && makedir(s) < 0);
1023 /* Call access but preserve the error string. */
1025 xaccess(char *name, int mode)
1031 errstr(err, sizeof err);
1032 rv = access(name, mode);
1033 errstr(err, sizeof err);
1038 openfname(Hdr *hp, char *fname, int dir, int mode)
1044 switch (hp->linkflag) {
1048 fprint(2, "%s: can't make (sym)link %s\n",
1052 fprint(2, "%s: can't make fifo %s\n", argv0, fname);
1055 if (!keepexisting || access(fname, AEXIST) < 0) {
1056 int rw = (dir? OREAD: OWRITE);
1058 fd = create(fname, rw, mode);
1061 fd = create(fname, rw, mode);
1063 if (fd < 0 && (!dir || xaccess(fname, AEXIST) < 0))
1064 cantcreate(fname, mode);
1066 if (fd >= 0 && verbose)
1067 fprint(2, "%s\n", fname);
1073 /* copy from archive to file system (or nowhere for table-of-contents) */
1075 copyfromar(int ar, int fd, char *fname, ulong blksleft, Off bytes)
1081 if (blksleft == 0 || bytes < 0)
1083 for (; blksleft > 0; blksleft -= blksread) {
1084 hbp = getblkrd(ar, (fd >= 0? Alldata: Justnxthdr));
1086 sysfatal("unexpected EOF on archive extracting %s from %s",
1088 blksread = gothowmany(blksleft);
1089 if (blksread <= 0) {
1090 fprint(2, "%s: got %ld blocks reading %s!\n",
1091 argv0, blksread, fname);
1094 wrbytes = Tblock*blksread;
1098 assert(wrbytes >= 0);
1100 ewrite(fname, fd, hbp->data, wrbytes);
1101 putreadblks(ar, blksread);
1106 fprint(2, "%s: %lld bytes uncopied at EOF on archive %s; "
1107 "%s not fully extracted\n", argv0, bytes, arname, fname);
1111 wrmeta(int fd, Hdr *hp, long mtime, int mode) /* update metadata */
1130 * copy a file from the archive into the filesystem.
1131 * fname is result of name(), so has two extra bytes at beginning.
1134 extract1(int ar, Hdr *hp, char *fname)
1136 int fd = -1, dir = 0;
1137 long mtime = strtol(hp->mtime, nil, 8);
1138 ulong mode = strtoul(hp->mode, nil, 8) & 0777;
1139 Off bytes = hdrsize(hp); /* for printing */
1140 ulong blksleft = BYTES2TBLKS(arsize(hp));
1142 /* fiddle name, figure out mode and blocks */
1147 switch (hp->linkflag) {
1158 else if(fname[0] == '#'){
1164 fd = openfname(hp, fname, dir, mode);
1166 char *cp = ctime(mtime);
1168 print("%M %8lld %-12.12s %-4.4s %s\n",
1169 mode, bytes, cp+4, cp+24, fname);
1171 print("%s\n", fname);
1173 copyfromar(ar, fd, fname, blksleft, bytes);
1175 /* touch up meta data and close */
1178 * directories should be wstated *after* we're done
1179 * creating files in them, but we don't do that.
1182 wrmeta(fd, hp, mtime, mode);
1188 skip(int ar, Hdr *hp, char *fname)
1190 ulong blksleft, blksread;
1193 for (blksleft = BYTES2TBLKS(arsize(hp)); blksleft > 0;
1194 blksleft -= blksread) {
1195 hbp = getblkrd(ar, Justnxthdr);
1197 sysfatal("unexpected EOF on archive extracting %s from %s",
1199 blksread = gothowmany(blksleft);
1200 putreadblks(ar, blksread);
1205 extract(char **argv)
1210 Compress *comp = nil;
1214 ar = open(usefile, OREAD);
1215 comp = compmethod(usefile);
1219 ar = push(ar, comp->decomp, Input, &ps);
1221 sysfatal("can't open archive %s: %r", usefile);
1223 while ((hp = readhdr(ar)) != nil) {
1224 longname = name(hp);
1225 if (match(longname, argv))
1226 extract1(ar, hp, longname);
1228 skip(ar, hp, longname);
1232 return pushclose(&ps);
1239 main(int argc, char *argv[])
1244 fmtinstall('M', dirmodefmt);
1252 usefile = arname = EARGF(usage());
1255 argid = strtoul(EARGF(usage()), 0, 0);
1263 case 'm': /* compatibility */
1288 aruid = strtoul(EARGF(usage()), 0, 0);
1302 fprint(2, "tar: unknown letter %C\n", TARGC());
1307 if (argc < 0 || errflg)
1314 ret = extract(argv);
1317 if (getwd(origdir, sizeof origdir) == nil)
1318 strcpy(origdir, "/tmp");
1319 ret = replace(argv);