8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
170 int p9bitnum(uchar*);
171 int p9subfont(uchar*);
172 void print_utf(void);
173 void type(char*, int);
177 int (*call[])(void) =
179 long0, /* recognizable by first 4 bytes */
180 istring, /* recognizable by first string */
181 iself, /* ELF (foreign) executable */
182 isexec, /* native executables */
183 iff, /* interchange file format (strings) */
184 longoff, /* recognizable by 4 bytes at some offset */
185 isoffstr, /* recognizable by string at some offset */
186 isrfc822, /* email file */
187 ismbox, /* mail box */
188 istar, /* recognizable by tar checksum */
189 ishtml, /* html keywords */
190 iscint, /* compiler/assembler intermediate */
191 islimbo, /* limbo source */
192 isc, /* c & alef compiler key words */
193 isas, /* assembler key words */
194 isp9font, /* plan 9 font */
195 isp9bit, /* plan 9 image (as from /dev/window) */
196 isrtf, /* rich text format */
197 ismsdos, /* msdos exe (virus file attachement) */
198 isicocur, /* windows icon or cursor file */
199 isface, /* ascii face file */
204 ismung, /* entropy compressed/encrypted */
205 isenglish, /* char frequency English */
211 char OCTET[] = "application/octet-stream";
212 char PLAIN[] = "text/plain";
215 main(int argc, char *argv[])
226 fprint(2, "usage: file [-m] [file...]\n");
231 if(mime == 0 || argc > 1){
232 for(i = 0; i < argc; i++) {
233 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
245 for(i = 0; i < argc; i++)
246 type(argv[i], maxlen);
252 type(char *file, int nlen)
260 for (i = 0, p = file; *p; i++) {
261 if (*p == '/') /* find rightmost slash */
263 p += chartorune(&r, p); /* count runes */
265 print("%s:%*s",file, nlen-i+1, "");
268 if ((fd = open(file, OREAD)) < 0) {
269 print("cannot open: %r\n");
277 * Unicode 4.0 4-byte runes.
286 fullrune1(char *p, int n)
294 if(n >= 2 && c < 0xE0)
296 if(n >= 3 && c < 0xF0)
305 chartorune1(Rune1 *rune, char *str)
307 int c, c1, c2, c3, n;
313 n = chartorune(&r, str);
318 c1 = *(uchar*)(str+1) & ~0x80;
319 c2 = *(uchar*)(str+2) & ~0x80;
320 c3 = *(uchar*)(str+3) & ~0x80;
321 n = (c<<18) | (c1<<12) | (c2<<6) | c3;
322 if(n < 0x10000 || n > 0x10FFFF){
341 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
346 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
351 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
352 memmove(buf, buf+3, nbuf-3);
356 if(memcmp(buf, "\xFE\xFF", 2) == 0){
362 memmove(rb, buf+2, nbuf);
364 e = p+sizeof(buf)-UTFmax-1;
365 for(i=0; i<nbuf && p < e; i+=2){
366 r = rb[i+1] | rb[i]<<8;
367 p += runetochar(p, &r);
371 nbuf = p - (char*)buf;
373 if(memcmp(buf, "\xFF\xFE", 2) == 0){
379 memmove(rb, buf+2, nbuf);
381 e = p+sizeof(buf)-UTFmax-1;
382 for(i=0; i<nbuf && p < e; i+=2){
383 r = rb[i] | rb[i+1]<<8;
384 p += runetochar(p, &r);
388 nbuf = p - (char*)buf;
402 print("cannot stat: %r\n");
405 if(mbuf->mode & DMDIR) {
406 print("%s\n", mime ? OCTET : "directory");
409 if(mbuf->type != 'M' && mbuf->type != '|') {
411 print("%s\n", OCTET);
413 print("special file #%C/%s\n", mbuf->type, mbuf->name);
416 /* may be reading a pipe on standard input */
417 nbuf = readn(fd, buf, sizeof(buf)-1);
419 print("cannot read: %r\n");
423 print("%s\n", mime ? PLAIN : "empty file");
431 * build histogram table
433 memset(cfreq, 0, sizeof(cfreq));
434 for (i = 0; language[i].name; i++)
435 language[i].count = 0;
436 eob = (char *)buf+nbuf;
437 for(n = 0, p = (char *)buf; p < eob; n++) {
438 if (!fullrune1(p, eob-p) && eob-p < UTFmax1)
440 p += chartorune1(&r, p);
443 else if (r <= 0x7f) {
444 if (!isprint(r) && !isspace(r))
445 f = Ceascii; /* ASCII control char */
447 } else if (r == 0x80) {
451 f = Cbinary; /* Invalid Runes */
453 f = Clatin; /* Latin 1 */
456 f = Cutf; /* UTF extension */
458 cfreq[f]++; /* ASCII chars peg directly */
465 else if (cfreq[Cutf])
467 else if (cfreq[Clatin])
469 else if (cfreq[Ceascii])
471 else if (cfreq[Cnull])
476 * lookup dictionary words
478 memset(wfreq, 0, sizeof(wfreq));
479 if(guess == Fascii || guess == Flatin || guess == Futf)
482 * call individual classify routines
484 for(i=0; call[i]; i++)
490 * print out gross classification
492 if (nbuf < 100 && !mime)
493 print(mime ? PLAIN : "short ");
495 print("%s\n", mime ? PLAIN : "Ascii");
496 else if (guess == Feascii)
497 print("%s\n", mime ? PLAIN : "extended ascii");
498 else if (guess == Flatin)
499 print("%s\n", mime ? PLAIN : "latin ascii");
500 else if (guess == Futf && utf_count() < 4)
502 else print("%s\n", mime ? OCTET : "binary");
506 bump_utf_count(Rune r)
510 high = sizeof(language)/sizeof(language[0])-1;
511 for (low = 0; low < high;) {
513 if (r >= language[mid].low) {
514 if (r <= language[mid].high) {
515 language[mid].count++;
528 for (i = 0; language[i].name; i++)
529 if (language[i].count > 0)
530 switch (language[i].mode) {
546 for (i = 'a'; i < 'z'; i++)
549 for (i = 'A'; i < 'Z'; i++)
556 find_first(char *name)
560 for (i = 0; language[i].name != 0; i++)
561 if (language[i].mode == First
562 && strcmp(language[i].name, name) == 0)
573 print("%s\n", PLAIN);
581 for (i = 0; language[i].name; i++)
582 if (language[i].count) {
583 switch(language[i].mode) {
585 j = find_first(language[i].name);
588 if (language[j].count > 0)
596 print("%s", language[i].name);
611 int low, high, mid, r;
616 while (p < buf+nbuf && !isalpha(*p))
621 while(p < buf+nbuf && isalpha(*p))
625 high = sizeof(dict)/sizeof(dict[0]);
626 for(low = 0;low < high;) {
628 r = strcmp(dict[mid].word, (char*)p2);
630 wfreq[dict[mid].class]++;
642 typedef struct Filemagic Filemagic;
651 * integers in this table must be as seen on a little-endian machine
652 * when read from a file.
654 Filemagic long0tab[] = {
655 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
657 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
659 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
660 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
661 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
662 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
663 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
664 070707, 0xFFFF, "cpio archive", "application/x-cpio",
665 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
666 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
667 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
668 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
670 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
672 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
674 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
676 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
678 * venti & fossil magic numbers are stored big-endian on disk,
679 * thus the numbers appear reversed in this table.
681 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
682 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
686 filemagic(Filemagic *tab, int ntab, ulong x)
690 for(i=0; i<ntab; i++)
691 if((x&tab[i].mask) == tab[i].x){
692 print("%s\n", mime ? tab[i].mime : tab[i].desc);
701 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
704 typedef struct Fileoffmag Fileoffmag;
711 * integers in this table must be as seen on a little-endian machine
712 * when read from a file.
714 Fileoffmag longofftab[] = {
716 * venti & fossil magic numbers are stored big-endian on disk,
717 * thus the numbers appear reversed in this table.
719 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
720 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
721 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
722 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
726 fileoffmagic(Fileoffmag *tab, int ntab)
731 uchar buf[sizeof(long)];
733 for(i=0; i<ntab; i++) {
735 seek(fd, tp->off, 0);
736 if (readn(fd, buf, sizeof buf) != sizeof buf)
739 if((x&tp->mask) == tp->x){
740 print("%s\n", mime ? tp->mime : tp->desc);
750 return fileoffmagic(longofftab, nelem(longofftab));
758 seek(fd, 0, 0); /* reposition to start of file */
759 if(crackhdr(fd, &f)) {
760 print("%s\n", mime ? OCTET : f.name);
768 enum { NAMSIZ = 100, TBLOCK = 512 };
783 char linkname[NAMSIZ];
784 /* rest are defined by POSIX's ustar format; see p1003.2b */
785 char magic[6]; /* "ustar" */
791 char prefix[155]; /* if non-null, path = prefix "/" name */
796 checksum(union hblock *hp)
800 struct header *hdr = &hp->dbuf;
802 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
805 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
815 union hblock *hp = (union hblock *)tblock;
816 struct header *hdr = &hp->dbuf;
818 seek(fd, 0, 0); /* reposition to start of file */
819 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
821 chksum = strtol(hdr->chksum, 0, 8);
822 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
823 if (strcmp(hdr->magic, "ustar") == 0)
824 print(mime? "application/x-ustar\n": "posix tar archive\n");
826 print(mime? "application/x-tar\n": "tar archive\n");
833 * initial words to classify file
843 "\x1f\x9d", "compressed", 2, "application/x-compress",
844 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
845 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
846 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
847 "!<arch>\n", "archive", 8, "application/octet-stream",
848 "070707", "cpio archive - ascii header", 6, "application/octet-stream",
849 "#!/bin/rc", "rc executable file", 9, "text/plain",
850 "#!/bin/sh", "sh executable file", 9, "text/plain",
851 "%!", "postscript", 2, "application/postscript",
852 "\004%!", "postscript", 3, "application/postscript",
853 "x T post", "troff output for post", 8, "application/troff",
854 "x T Latin1", "troff output for Latin1", 10, "application/troff",
855 "x T utf", "troff output for UTF", 7, "application/troff",
856 "x T 202", "troff output for 202", 7, "application/troff",
857 "x T aps", "troff output for aps", 7, "application/troff",
858 "x T ", "troff output", 4, "application/troff",
859 "GIF", "GIF image", 3, "image/gif",
860 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
861 "%PDF", "PDF", 4, "application/pdf",
862 "<!DOCTYPE", "HTML file", 9, "text/html",
863 "<!doctype", "HTML file", 9, "text/html",
864 "<!--", "HTML file", 4, "text/html",
865 "<html>", "HTML file", 6, "text/html",
866 "<HTML>", "HTML file", 6, "text/html",
867 "<?xml", "HTML file", 5, "text/html",
868 "\111\111\052\000", "tiff", 4, "image/tiff",
869 "\115\115\000\052", "tiff", 4, "image/tiff",
870 "\377\330\377\340", "jpeg", 4, "image/jpeg",
871 "\377\330\377\341", "jpeg", 4, "image/jpeg",
872 "\377\330\377\333", "jpeg", 4, "image/jpeg",
873 "\xff\xd8", "jpeg", 2, "image/jpeg",
874 "BM", "bmp", 2, "image/bmp",
875 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
876 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
877 "\033E\033", "HP PCL printer data", 3, OCTET,
878 "\033&", "HP PCL printer data", 2, OCTET,
879 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
880 "\033Lua", "Lua bytecode", 4, OCTET,
881 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
882 "OggS", "ogg audio", 4, "audio/ogg",
883 ".snd", "sun audio", 4, "audio/basic",
884 "\211PNG", "PNG image", 4, "image/png",
885 "P3\n", "ppm", 3, "image/ppm",
886 "P6\n", "ppm", 3, "image/ppm",
887 "/* XPM */\n", "xbm", 10, "image/xbm",
888 ".HTML ", "troff -ms input", 6, "text/troff",
889 ".LP", "troff -ms input", 3, "text/troff",
890 ".ND", "troff -ms input", 3, "text/troff",
891 ".PP", "troff -ms input", 3, "text/troff",
892 ".TL", "troff -ms input", 3, "text/troff",
893 ".TR", "troff -ms input", 3, "text/troff",
894 ".TH", "manual page", 3, "text/troff",
895 ".\\\"", "troff input", 3, "text/troff",
896 ".de", "troff input", 3, "text/troff",
897 ".if", "troff input", 3, "text/troff",
898 ".nr", "troff input", 3, "text/troff",
899 ".tr", "troff input", 3, "text/troff",
900 "vac:", "venti score", 4, "text/plain",
901 "-----BEGIN CERTIFICATE-----\n",
902 "pem certificate", -1, "text/plain",
903 "-----BEGIN TRUSTED CERTIFICATE-----\n",
904 "pem trusted certificate", -1, "text/plain",
905 "-----BEGIN X509 CERTIFICATE-----\n",
906 "pem x.509 certificate", -1, "text/plain",
907 "subject=/C=", "pem certificate with header", -1, "text/plain",
908 "process snapshot ", "process snapshot", -1, "application/snapfs",
909 "d8:announce", "torrent file", 11, "application/x-bittorrent",
910 "[playlist]", "playlist", 10, "application/x-scpls",
911 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
912 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
913 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
921 struct FILE_STRING *p;
923 for(p = file_string; p->key; p++) {
927 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
928 print("%s\n", mime ? p->mime : p->filetype);
932 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
933 for(i = 5; i < nbuf; i++)
937 print("%s\n", OCTET);
939 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
950 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
961 for(p = offstrs; p->key; p++) {
966 if (readn(fd, buf, n) != n)
968 if(memcmp(buf, p->key, n) == 0) {
969 print("%s\n", mime ? p->mime : p->filetype);
979 if (strncmp((char*)buf, "FORM", 4) == 0 &&
980 strncmp((char*)buf+8, "AIFF", 4) == 0) {
981 print("%s\n", mime? "audio/x-aiff": "aiff audio");
984 if (strncmp((char*)buf, "RIFF", 4) == 0) {
985 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
986 print("%s\n", mime? "audio/wave": "wave audio");
987 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
988 print("%s\n", mime? "video/avi": "avi video");
990 print("%s\n", mime? "application/octet-stream": "riff file");
996 char* html_string[] = {
998 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
1000 "button", "center", "iframe", "object", "option", "script",
1002 "blink", "embed", "frame", "input", "label", "param", "small",
1003 "style", "table", "tbody", "tfoot", "thead", "title",
1004 "?xml", "body", "code", "font", "form", "head", "html",
1005 "link", "menu", "meta", "span",
1006 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
1007 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
1008 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
1009 "a", "b", "i", "p", "q", "u",
1022 while(p < buf+nbuf && *p != '<')
1031 for(i = 0; html_string[i]; i++){
1032 n = strlen(html_string[i]);
1033 if(p + n > buf+nbuf)
1035 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1037 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1039 print("%s\n", mime ? "text/html" : "HTML file");
1050 char* rfc822_string[] =
1072 q = strchr(p, '\n');
1076 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1083 if(*p != '\t' && *p != ' '){
1087 for(i = 0; rfc822_string[i]; i++) {
1088 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1097 print("%s\n", mime ? "message/rfc822" : "email file");
1109 q = strchr(p, '\n');
1113 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1114 print("%s\n", mime ? "text/plain" : "mail box");
1128 if(Binit(&b, fd, OREAD) == Beof)
1131 type = objtype(&b, &name);
1135 print("%s\n", OCTET);
1137 print("%s intermediate\n", name);
1150 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1152 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1157 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1162 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1168 print("%s\n", PLAIN);
1171 if(wfreq[Alword] > 0)
1172 print("alef program\n");
1174 print("c program\n");
1184 if(wfreq[Lword] < 4)
1186 print("%s\n", mime ? PLAIN : "limbo program");
1196 if(wfreq[Aword] < 2)
1198 print("%s\n", mime ? PLAIN : "as program");
1210 if((p[12] | p[13]<<8) == 0) /* width */
1212 if((p[14] | p[15]<<8) == 0) /* height */
1214 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1216 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1218 if(p[1] == 0){ /* non color-mapped */
1219 if((p[2]&3) != 2 && (p[2]&3) != 3)
1221 if((p[5] | p[6]<<8) != 0) /* palette length */
1224 if(p[1] == 1){ /* color-mapped */
1225 if((p[2]&3) != 1 || p[7] == 0)
1227 if((p[5] | p[6]<<8) == 0) /* palette length */
1231 print("%s\n", mime ? "image/tga" : "targa image");
1242 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1243 if((p[1] & 0xFE) == 0xFA){
1244 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1253 * low entropy means encrypted
1263 memset(bucket, 0, sizeof(bucket));
1264 for(i=nbuf-64; i<nbuf; i++)
1265 bucket[(buf[i]>>5)&07] += 1;
1269 cs += (bucket[i]-8)*(bucket[i]-8);
1272 if(buf[0]==0x1f && buf[1]==0x9d)
1273 print("%s\n", mime ? "application/x-compress" : "compressed");
1275 if(buf[0]==0x1f && buf[1]==0x8b)
1276 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1278 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1279 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1281 if(buf[0]==0x78 && buf[1]==0x9c)
1282 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1284 print("%s\n", mime ? OCTET : "encrypted");
1291 * english by punctuation and frequencies
1296 int vow, comm, rare, badpun, punct;
1299 if(guess != Fascii && guess != Feascii)
1303 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1313 if(p[1] != ' ' && p[1] != '\n')
1316 if(badpun*5 > punct)
1318 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1320 if(2*cfreq[';'] > cfreq['e'])
1324 for(p="AEIOU"; *p; p++) {
1326 vow += cfreq[tolower(*p)];
1329 for(p="ETAION"; *p; p++) {
1331 comm += cfreq[tolower(*p)];
1334 for(p="VJKQXZ"; *p; p++) {
1336 rare += cfreq[tolower(*p)];
1338 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1339 print("%s\n", mime ? PLAIN : "English text");
1346 * pick up a number with
1376 depthof(char *s, int *newp)
1383 while(s<es && *s==' ')
1387 if('0'<=*s && *s<='9')
1388 return 1<<strtol(s, 0, 0);
1392 while(s<es && *s!=' '){
1393 s++; /* skip letter */
1394 d += strtoul(s, &s, 10);
1397 if(d % 8 == 0 || 8 % d == 0)
1406 int dep, lox, loy, hix, hiy, px, new, cmpr;
1416 if(memcmp(cp, "compressed\n", 11) == 0) {
1421 dep = depthof((char*)cp + 0*P9BITLEN, &new);
1424 lox = p9bitnum(cp + 1*P9BITLEN);
1425 loy = p9bitnum(cp + 2*P9BITLEN);
1426 hix = p9bitnum(cp + 3*P9BITLEN);
1427 hiy = p9bitnum(cp + 4*P9BITLEN);
1428 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1432 px = 8/dep; /* pixels per byte */
1433 /* set l to number of bytes of data per scan line */
1435 len = (hix+px-1)/px - lox/px;
1436 else{ /* make positive before divide */
1439 len = (t+hix+px-1)/px;
1442 len = (hix-lox)*dep/8;
1443 len *= hiy - loy; /* col length */
1444 len += 5 * P9BITLEN; /* size of initial ascii */
1447 * for compressed images, don't look any further. otherwise:
1448 * for image file, length is non-zero and must match calculation above.
1449 * for /dev/window and /dev/screen the length is always zero.
1450 * for subfont, the subfont header should follow immediately.
1453 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1458 * mbuf->length == 0 probably indicates reading a pipe.
1459 * Ghostscript sometimes produces a little extra on the end.
1461 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1462 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1463 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1466 if (p9subfont(buf+len)) {
1467 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1478 /* if image too big, assume it's a subfont */
1479 if (p+3*P9BITLEN > buf+sizeof(buf))
1482 n = p9bitnum(p + 0*P9BITLEN); /* char count */
1485 h = p9bitnum(p + 1*P9BITLEN); /* height */
1488 a = p9bitnum(p + 2*P9BITLEN); /* ascent */
1494 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1501 char pathname[1024];
1504 if (!getfontnum(cp, &cp)) /* height */
1506 if (!getfontnum(cp, &cp)) /* ascent */
1508 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1509 if (!getfontnum(cp, &cp)) /* min */
1511 if (!getfontnum(cp, &cp)) /* max */
1513 getfontnum(cp, &cp); /* optional offset */
1514 while (WHITESPACE(*cp))
1516 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1518 /* construct a path name, if needed */
1520 if (*p != '/' && slash) {
1522 if (n < sizeof(pathname))
1523 memcpy(pathname, fname, n);
1526 if (n+cp-p+4 < sizeof(pathname)) {
1527 memcpy(pathname+n, p, cp-p);
1530 if (access(pathname, AEXIST) < 0) {
1531 strcpy(pathname+n, ".0");
1532 if (access(pathname, AEXIST) < 0)
1538 print(mime ? "text/plain\n" : "font file\n");
1545 getfontnum(uchar *cp, uchar **rp)
1547 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1549 if (*cp < '0' || *cp > '9')
1551 strtoul((char *)cp, (char **)rp, 0);
1552 if (!WHITESPACE(**rp)) {
1562 if(strstr((char *)buf, "\\rtf1")){
1563 print(mime ? "application/rtf\n" : "rich text format\n");
1572 if (buf[0] == 0x4d && buf[1] == 0x5a){
1573 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1582 if(buf[0] || buf[1] || buf[3] || buf[9])
1584 if(buf[4] == 0x00 && buf[5] == 0x00)
1588 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1591 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1600 static char *cpu[] = { /* NB: incomplete and arbitary list */
1623 static char *type[] = {
1624 [1] "relocatable object",
1626 [3] "shared library",
1630 if (memcmp(buf, "\x7fELF", 4) == 0){
1633 int n = (buf[19] << 8) | buf[18];
1634 char *p = "unknown";
1635 char *t = "unknown";
1637 if (n > 0 && n < nelem(cpu) && cpu[n])
1640 /* try the other byte order */
1642 n = (buf[18] << 8) | buf[19];
1643 if (n > 0 && n < nelem(cpu) && cpu[n])
1647 n = (buf[16]<< 8) | buf[17];
1649 n = (buf[17]<< 8) | buf[16];
1651 if(n>0 && n < nelem(type) && type[n])
1653 print("%s ELF %s\n", p, t);
1656 print("application/x-elf-executable\n");
1666 int i, j, ldepth, l;
1670 for(j = 0; j < 3; j++){
1671 for(p = (char*)buf, i=0; i<3; i++){
1672 if(p[0] != '0' || p[1] != 'x')
1676 else if(buf[2+4] == ',')
1687 while(*p == ' ' || *p == '\t')
1695 print("application/x-face\n");
1697 print("face image depth %d\n", ldepth);