X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=sys%2Fsrc%2Fcmd%2Ffile.c;h=bd9d1afb5b492146277b6c53411a34177dbc2026;hb=ac88ce4f7f19be1aae1fdf390fa870b4be8dc3f5;hp=5d1547840145d80b703b160276355f64177e4770;hpb=f21719ea46e6c060cc8fadaffb41b7dfb89be62e;p=plan9front.git diff --git a/sys/src/cmd/file.c b/sys/src/cmd/file.c index 5d1547840..bd9d1afb5 100644 --- a/sys/src/cmd/file.c +++ b/sys/src/cmd/file.c @@ -150,6 +150,9 @@ int ishtml(void); int isrfc822(void); int ismbox(void); int islimbo(void); +int istga(void); +int ismp3(void); +int ismp4(void); int ismung(void); int isp9bit(void); int isp9font(void); @@ -165,7 +168,8 @@ int longoff(void); int istar(void); int isface(void); int isexec(void); -int p9bitnum(uchar*); +int isudiff(void); +int p9bitnum(char*, int*); int p9subfont(uchar*); void print_utf(void); void type(char*, int); @@ -181,11 +185,12 @@ int (*call[])(void) = iff, /* interchange file format (strings) */ longoff, /* recognizable by 4 bytes at some offset */ isoffstr, /* recognizable by string at some offset */ + isudiff, /* unified diff output */ isrfc822, /* email file */ ismbox, /* mail box */ istar, /* recognizable by tar checksum */ - ishtml, /* html keywords */ iscint, /* compiler/assembler intermediate */ + ishtml, /* html keywords */ islimbo, /* limbo source */ isc, /* c & alef compiler key words */ isas, /* assembler key words */ @@ -195,6 +200,9 @@ int (*call[])(void) = ismsdos, /* msdos exe (virus file attachement) */ isicocur, /* windows icon or cursor file */ isface, /* ascii face file */ + istga, + ismp4, + ismp3, /* last resorts */ ismung, /* entropy compressed/encrypted */ @@ -204,8 +212,8 @@ int (*call[])(void) = int mime; -char OCTET[] = "application/octet-stream\n"; -char PLAIN[] = "text/plain\n"; +char OCTET[] = "application/octet-stream"; +char PLAIN[] = "text/plain"; void main(int argc, char *argv[]) @@ -262,101 +270,113 @@ type(char *file, int nlen) } fname = file; if ((fd = open(file, OREAD)) < 0) { - print("cannot open: %r\n"); + fprint(2, "cannot open: %r\n"); return; } filetype(fd); close(fd); } -/* - * Unicode 4.0 4-byte runes. - */ -typedef int Rune1; - -enum { - UTFmax1 = 4, -}; - -int -fullrune1(char *p, int n) -{ - int c; - - if(n >= 1) { - c = *(uchar*)p; - if(c < 0x80) - return 1; - if(n >= 2 && c < 0xE0) - return 1; - if(n >= 3 && c < 0xF0) - return 1; - if(n >= 4) - return 1; - } - return 0; -} - -int -chartorune1(Rune1 *rune, char *str) +void +utfconv(void) { - int c, c1, c2, c3, n; Rune r; + uchar *rb; + char *p, *e; + int i; - c = *(uchar*)str; - if(c < 0xF0){ - r = 0; - n = chartorune(&r, str); - *rune = r; - return n; - } - c &= ~0xF0; - c1 = *(uchar*)(str+1) & ~0x80; - c2 = *(uchar*)(str+2) & ~0x80; - c3 = *(uchar*)(str+3) & ~0x80; - n = (c<<18) | (c1<<12) | (c2<<6) | c3; - if(n < 0x10000 || n > 0x10FFFF){ - *rune = Runeerror; - return 1; + if(nbuf < 4) + return; + + if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){ + if(!mime) + print("utf-32be "); + return; + } else + if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){ + if(!mime) + print("utf-32le "); + return; + } else + if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){ + memmove(buf, buf+3, nbuf-3); + nbuf -= 3; + return; + } else + if(memcmp(buf, "\xFE\xFF", 2) == 0){ + if(!mime) + print("utf-16be "); + + nbuf -= 2; + rb = malloc(nbuf+1); + memmove(rb, buf+2, nbuf); + p = (char*)buf; + e = p+sizeof(buf)-UTFmax-1; + for(i=0; imode & DMDIR) { - print(mime ? OCTET : "directory\n"); + print("%s\n", mime ? OCTET : "directory"); return; } if(mbuf->type != 'M' && mbuf->type != '|') { - print(mime ? OCTET : "special file #%C/%s\n", - mbuf->type, mbuf->name); + if(mime) + print("%s\n", OCTET); + else + print("special file #%C/%s\n", mbuf->type, mbuf->name); return; } /* may be reading a pipe on standard input */ nbuf = readn(fd, buf, sizeof(buf)-1); if(nbuf < 0) { - print("cannot read: %r\n"); + fprint(2, "cannot read: %r\n"); return; } if(nbuf == 0) { - print(mime ? PLAIN : "empty file\n"); + print("%s\n", mime ? PLAIN : "empty file"); return; } buf[nbuf] = 0; + utfconv(); + /* * build histogram table */ @@ -365,9 +385,9 @@ filetype(int fd) language[i].count = 0; eob = (char *)buf+nbuf; for(n = 0, p = (char *)buf; p < eob; n++) { - if (!fullrune1(p, eob-p) && eob-p < UTFmax1) + if (!fullrune(p, eob-p) && eob-p < UTFmax) break; - p += chartorune1(&r, p); + p += chartorune(&r, p); if (r == 0) f = Cnull; else if (r <= 0x7f) { @@ -422,14 +442,14 @@ filetype(int fd) if (nbuf < 100 && !mime) print(mime ? PLAIN : "short "); if (guess == Fascii) - print(mime ? PLAIN : "Ascii\n"); + print("%s\n", mime ? PLAIN : "Ascii"); else if (guess == Feascii) - print(mime ? PLAIN : "extended ascii\n"); + print("%s\n", mime ? PLAIN : "extended ascii"); else if (guess == Flatin) - print(mime ? PLAIN : "latin ascii\n"); + print("%s\n", mime ? PLAIN : "latin ascii"); else if (guess == Futf && utf_count() < 4) print_utf(); - else print(mime ? OCTET : "binary\n"); + else print("%s\n", mime ? OCTET : "binary"); } void @@ -500,7 +520,7 @@ print_utf(void) int i, printed, j; if(mime){ - print(PLAIN); + print("%s\n", PLAIN); return; } if (chkascii()) { @@ -582,37 +602,41 @@ struct Filemagic { * when read from a file. */ Filemagic long0tab[] = { - 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, + 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET, /* "pac1" */ - 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, + 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET, /* "pXc2 */ - 0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET, - 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, - 0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET, - 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, - 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", - 070707, 0xFFFF, "cpio archive\n", "application/x-cpio", - 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", - 0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg", - 0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be", - 0xfffe, 0xffffffff, "utf-32le\n", "text/plain charset=utf-32le", - 0xfeff, 0xffff, "utf-16be\n", "text/plain charset=utf-16be", - 0xfffe, 0xffff, "utf-16le\n", "text/plain charset=utf-16le", + 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET, + 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET, + 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac", + 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET, + 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip", + 070707, 0xFFFF, "cpio archive", "application/x-cpio", + 0x2F7, 0xFFFF, "tex dvi", "application/dvi", + 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg", + 0xf0ff, 0xf6ff, "aac audio", "audio/mpeg", /* 0xfeedface: this could alternately be a Next Plan 9 boot image */ - 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable\n", OCTET, + 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET, /* 0xfeedfacf */ - 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable\n", OCTET, + 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET, /* 0xcefaedfe */ - 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable\n", OCTET, + 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET, /* 0xcffaedfe */ - 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable\n", OCTET, + 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET, /* 0xcafebabe */ - 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable\n", OCTET, + 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET, /* * venti & fossil magic numbers are stored big-endian on disk, * thus the numbers appear reversed in this table. */ - 0xad4e5cd1, 0xFFFFFFFF, "venti arena\n", OCTET, + 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET, + 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET, + 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET, + /* tcpdump pcap file */ + 0xa1b2c3d4, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap", + 0xd4c3b2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap", + 0xa1b23c4d, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap", + 0x4d3cb2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap", }; int @@ -622,7 +646,7 @@ filemagic(Filemagic *tab, int ntab, ulong x) for(i=0; imask) == tp->x){ - print(mime? tp->mime: tp->desc); + print("%s\n", mime ? tp->mime : tp->desc); return 1; } } @@ -690,7 +716,7 @@ isexec(void) seek(fd, 0, 0); /* reposition to start of file */ if(crackhdr(fd, &f)) { - print(mime ? OCTET : "%s\n", f.name); + print("%s\n", mime ? OCTET : f.name); return 1; } return 0; @@ -776,11 +802,11 @@ struct FILE_STRING "\x1f\x9d", "compressed", 2, "application/x-compress", "\x1f\x8b", "gzip compressed", 2, "application/x-gzip", "BZh", "bzip2 compressed", 3, "application/x-bzip2", - "!\n__.SYMDEF", "archive random library", 16, "application/octet-stream", - "!\n", "archive", 8, "application/octet-stream", - "070707", "cpio archive - ascii header", 6, "application/octet-stream", - "#!/bin/rc", "rc executable file", 9, "text/plain", - "#!/bin/sh", "sh executable file", 9, "text/plain", + "!\n__.SYMDEF", "archive random library", 16, OCTET, + "!\n", "archive", 8, OCTET, + "070707", "cpio archive - ascii header", 6, OCTET, + "#!/bin/rc", "rc executable file", 9, PLAIN, + "#!/bin/sh", "sh executable file", 9, PLAIN, "%!", "postscript", 2, "application/postscript", "\004%!", "postscript", 3, "application/postscript", "x T post", "troff output for post", 8, "application/troff", @@ -788,6 +814,7 @@ struct FILE_STRING "x T utf", "troff output for UTF", 7, "application/troff", "x T 202", "troff output for 202", 7, "application/troff", "x T aps", "troff output for aps", 7, "application/troff", + "x T ", "troff output", 4, "application/troff", "GIF", "GIF image", 3, "image/gif", "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", "%PDF", "PDF", 4, "application/pdf", @@ -802,15 +829,24 @@ struct FILE_STRING "\377\330\377\340", "jpeg", 4, "image/jpeg", "\377\330\377\341", "jpeg", 4, "image/jpeg", "\377\330\377\333", "jpeg", 4, "image/jpeg", + "\xff\xd8", "jpeg", 2, "image/jpeg", "BM", "bmp", 2, "image/bmp", "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc", "key); if(nbuf >= l && memcmp(buf, p->key, l) == 0) { - if(mime) - print("%s\n", p->mime); - else - print("%s\n", p->filetype); + print("%s\n", mime ? p->mime : p->filetype); return 1; } } @@ -860,7 +909,7 @@ istring(void) if(buf[i] == '\n') break; if(mime) - print(OCTET); + print("%s\n", OCTET); else print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); return 1; @@ -874,6 +923,7 @@ struct offstr struct FILE_STRING; } offstrs[] = { 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image", + 32*4, "DICM", "DICOM medical imaging data", 4, "application/dicom", 0, 0, 0, 0, 0 }; @@ -892,10 +942,7 @@ isoffstr(void) if (readn(fd, buf, n) != n) continue; if(memcmp(buf, p->key, n) == 0) { - if(mime) - print("%s\n", p->mime); - else - print("%s\n", p->filetype); + print("%s\n", mime ? p->mime : p->filetype); return 1; } } @@ -916,8 +963,7 @@ iff(void) else if (strncmp((char*)buf+8, "AVI ", 4) == 0) print("%s\n", mime? "video/avi": "avi video"); else - print("%s\n", mime? "application/octet-stream": - "riff file"); + print("%s\n", mime? OCTET : "riff file"); return 1; } return 0; @@ -940,6 +986,25 @@ char* html_string[] = { 0, }; +int +isudiff(void) +{ + char *p; + + p = (char*)buf; + if((p = strstr(p, "diff")) != nil) + if((p = strchr(p, '\n')) != nil) + if(strncmp(++p, "--- ", 4) == 0) + if((p = strchr(p, '\n')) != nil) + if(strncmp(++p, "+++ ", 4) == 0) + if((p = strchr(p, '\n')) != nil) + if(strncmp(++p, "@@ ", 3) == 0){ + print("%s\n", mime ? "text/plain" : "unified diff output"); + return 1; + } + return 0; +} + int ishtml(void) { @@ -966,7 +1031,7 @@ ishtml(void) p += n; if(p < buf+nbuf && strchr("\t\r\n />", *p)){ if(++count > 2) { - print(mime ? "text/html\n" : "HTML file\n"); + print("%s\n", mime ? "text/html" : "HTML file"); return 1; } } @@ -1024,7 +1089,7 @@ isrfc822(void) p = q+1; } if(count >= 3){ - print(mime ? "message/rfc822\n" : "email file\n"); + print("%s\n", mime ? "message/rfc822" : "email file"); return 1; } return 0; @@ -1041,7 +1106,7 @@ ismbox(void) return 0; *q = 0; if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ - print(mime ? "text/plain\n" : "mail box\n"); + print("%s\n", mime ? PLAIN : "mail box"); return 1; } *q = '\n'; @@ -1062,7 +1127,7 @@ iscint(void) if(type < 0) return 0; if(mime) - print(OCTET); + print("%s\n", OCTET); else print("%s intermediate\n", name); return 1; @@ -1095,7 +1160,7 @@ isc(void) yes: if(mime){ - print(PLAIN); + print("%s\n", PLAIN); return 1; } if(wfreq[Alword] > 0) @@ -1108,29 +1173,95 @@ yes: int islimbo(void) { - /* * includes */ if(wfreq[Lword] < 4) return 0; - print(mime ? PLAIN : "limbo program\n"); + print("%s\n", mime ? PLAIN : "limbo program"); return 1; } int isas(void) { - /* * includes */ if(wfreq[Aword] < 2) return 0; - print(mime ? PLAIN : "as program\n"); + print("%s\n", mime ? PLAIN : "as program"); return 1; } +int +istga(void) +{ + uchar *p; + + p = buf; + if(nbuf < 18) + return 0; + if((p[12] | p[13]<<8) == 0) /* width */ + return 0; + if((p[14] | p[15]<<8) == 0) /* height */ + return 0; + if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */ + return 0; + if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */ + return 0; + if(p[1] == 0){ /* non color-mapped */ + if((p[2]&3) != 2 && (p[2]&3) != 3) + return 0; + if((p[5] | p[6]<<8) != 0) /* palette length */ + return 0; + } else + if(p[1] == 1){ /* color-mapped */ + if((p[2]&3) != 1 || p[7] == 0) + return 0; + if((p[5] | p[6]<<8) == 0) /* palette length */ + return 0; + } else + return 0; + print("%s\n", mime ? "image/tga" : "targa image"); + return 1; +} + +int +ismp3(void) +{ + uchar *p, *e; + + p = buf; + e = p + nbuf-1; + while((p < e) && (p = memchr(p, 0xFF, e - p))){ + if((p[1] & 0xFE) == 0xFA){ + print("%s\n", mime ? "audio/mpeg" : "mp3 audio"); + return 1; + } + p++; + } + return 0; +} + +int +ismp4(void) +{ + if(nbuf <= 12) + return 0; + if(memcmp(&buf[4], "ftyp", 4) != 0) + return 0; + if(memcmp(&buf[8], "isom", 4) == 0){ + print("%s\n", mime ? "video/mp4" : "mp4 video"); + return 1; + } + if(memcmp(&buf[8], "M4A ", 4) == 0){ + print("%s\n", mime ? "audio/m4a" : "m4a audio"); + return 1; + } + return 0; +} + /* * low entropy means encrypted */ @@ -1152,18 +1283,18 @@ ismung(void) cs /= 8.; if(cs <= 24.322) { if(buf[0]==0x1f && buf[1]==0x9d) - print(mime ? "application/x-compress" : "compressed\n"); + print("%s\n", mime ? "application/x-compress" : "compressed"); else if(buf[0]==0x1f && buf[1]==0x8b) - print(mime ? "application/x-gzip" : "gzip compressed\n"); + print("%s\n", mime ? "application/x-gzip" : "gzip compressed"); else if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h') - print(mime ? "application/x-bzip2" : "bzip2 compressed\n"); + print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed"); else if(buf[0]==0x78 && buf[1]==0x9c) - print(mime ? "application/x-deflate" : "zlib compressed\n"); + print("%s\n", mime ? "application/x-deflate" : "zlib compressed"); else - print(mime ? OCTET : "encrypted\n"); + print("%s\n", mime ? OCTET : "encrypted"); return 1; } return 0; @@ -1218,7 +1349,7 @@ isenglish(void) rare += cfreq[tolower(*p)]; } if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { - print(mime ? PLAIN : "English text\n"); + print("%s\n", mime ? PLAIN : "English text"); return 1; } return 0; @@ -1230,28 +1361,18 @@ isenglish(void) */ #define P9BITLEN 12 int -p9bitnum(uchar *bp) +p9bitnum(char *s, int *v) { - int n, c, len; + char *es; - len = P9BITLEN; - while(*bp == ' ') { - bp++; - len--; - if(len <= 0) - return -1; - } - n = 0; - while(len > 1) { - c = *bp++; - if(!isdigit(c)) - return -1; - n = n*10 + c-'0'; - len--; - } - if(*bp != ' ') + if(s[P9BITLEN-1] != ' ') return -1; - return n; + s[P9BITLEN-1] = '\0'; + *v = strtol(s, &es, 10); + s[P9BITLEN-1] = ' '; + if(es != &s[P9BITLEN-1]) + return -1; + return 0; } int @@ -1267,13 +1388,18 @@ depthof(char *s, int *newp) if(s == es) return -1; if('0'<=*s && *s<='9') - return 1<= 0) - len = (hix+px-1)/px - lox/px; - else{ /* make positive before divide */ - t = (-lox)+px-1; - t = (t/px)*px; - len = (t+hix+px-1)/px; - } + len = (hix+px-1)/px; }else - len = (hix-lox)*dep/8; - len *= hiy - loy; /* col length */ + len = hix*dep/8; + len *= hiy; /* col length */ len += 5 * P9BITLEN; /* size of initial ascii */ /* @@ -1332,8 +1456,8 @@ isp9bit(void) * for subfont, the subfont header should follow immediately. */ if (cmpr) { - print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n", - newlabel, dep); + print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n", + newlabel, dep, hix, hiy); return 1; } /* @@ -1342,11 +1466,13 @@ isp9bit(void) */ if (len != 0 && (mbuf->length == 0 || mbuf->length == len || mbuf->length > len && mbuf->length < len+P9BITLEN)) { - print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep); + print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n", + newlabel, dep, hix, hiy); return 1; } if (p9subfont(buf+len)) { - print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep); + print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n", + newlabel, dep, hix, hiy); return 1; } return 0; @@ -1361,16 +1487,15 @@ p9subfont(uchar *p) if (p+3*P9BITLEN > buf+sizeof(buf)) return 1; - n = p9bitnum(p + 0*P9BITLEN); /* char count */ - if (n < 0) + if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0) /* char count */ return 0; - h = p9bitnum(p + 1*P9BITLEN); /* height */ - if (h < 0) + if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0) /* height */ return 0; - a = p9bitnum(p + 2*P9BITLEN); /* ascent */ - if (a < 0) + if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0) /* ascent */ return 0; - return 1; + if(n > 0 && h > 0 && a >= 0) + return 1; + return 0; } #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') @@ -1417,7 +1542,7 @@ isp9font(void) } } if (i) { - print(mime ? "text/plain\n" : "font file\n"); + print("%s\n", mime ? PLAIN : "font file"); return 1; } return 0; @@ -1535,7 +1660,7 @@ iself(void) print("%s ELF %s\n", p, t); } else - print("application/x-elf-executable"); + print("application/x-elf-executable\n"); return 1; } @@ -1579,4 +1704,3 @@ isface(void) print("face image depth %d\n", ldepth); return 1; } -