8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
170 int p9bitnum(char*, int*);
171 int p9subfont(uchar*);
172 void print_utf(void);
173 void type(char*, int);
177 int (*call[])(void) =
179 long0, /* recognizable by first 4 bytes */
180 istring, /* recognizable by first string */
181 iself, /* ELF (foreign) executable */
182 isexec, /* native executables */
183 iff, /* interchange file format (strings) */
184 longoff, /* recognizable by 4 bytes at some offset */
185 isoffstr, /* recognizable by string at some offset */
186 isrfc822, /* email file */
187 ismbox, /* mail box */
188 istar, /* recognizable by tar checksum */
189 iscint, /* compiler/assembler intermediate */
190 ishtml, /* html keywords */
191 islimbo, /* limbo source */
192 isc, /* c & alef compiler key words */
193 isas, /* assembler key words */
194 isp9font, /* plan 9 font */
195 isp9bit, /* plan 9 image (as from /dev/window) */
196 isrtf, /* rich text format */
197 ismsdos, /* msdos exe (virus file attachement) */
198 isicocur, /* windows icon or cursor file */
199 isface, /* ascii face file */
204 ismung, /* entropy compressed/encrypted */
205 isenglish, /* char frequency English */
211 char OCTET[] = "application/octet-stream";
212 char PLAIN[] = "text/plain";
215 main(int argc, char *argv[])
226 fprint(2, "usage: file [-m] [file...]\n");
231 if(mime == 0 || argc > 1){
232 for(i = 0; i < argc; i++) {
233 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
245 for(i = 0; i < argc; i++)
246 type(argv[i], maxlen);
252 type(char *file, int nlen)
260 for (i = 0, p = file; *p; i++) {
261 if (*p == '/') /* find rightmost slash */
263 p += chartorune(&r, p); /* count runes */
265 print("%s:%*s",file, nlen-i+1, "");
268 if ((fd = open(file, OREAD)) < 0) {
269 fprint(2, "cannot open: %r\n");
287 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
292 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
297 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298 memmove(buf, buf+3, nbuf-3);
302 if(memcmp(buf, "\xFE\xFF", 2) == 0){
308 memmove(rb, buf+2, nbuf);
310 e = p+sizeof(buf)-UTFmax-1;
311 for(i=0; i<nbuf && p < e; i+=2){
312 r = rb[i+1] | rb[i]<<8;
313 p += runetochar(p, &r);
317 nbuf = p - (char*)buf;
319 if(memcmp(buf, "\xFF\xFE", 2) == 0){
325 memmove(rb, buf+2, nbuf);
327 e = p+sizeof(buf)-UTFmax-1;
328 for(i=0; i<nbuf && p < e; i+=2){
329 r = rb[i] | rb[i+1]<<8;
330 p += runetochar(p, &r);
334 nbuf = p - (char*)buf;
348 fprint(2, "cannot stat: %r\n");
351 if(mbuf->mode & DMDIR) {
352 print("%s\n", mime ? OCTET : "directory");
355 if(mbuf->type != 'M' && mbuf->type != '|') {
357 print("%s\n", OCTET);
359 print("special file #%C/%s\n", mbuf->type, mbuf->name);
362 /* may be reading a pipe on standard input */
363 nbuf = readn(fd, buf, sizeof(buf)-1);
365 fprint(2, "cannot read: %r\n");
369 print("%s\n", mime ? PLAIN : "empty file");
377 * build histogram table
379 memset(cfreq, 0, sizeof(cfreq));
380 for (i = 0; language[i].name; i++)
381 language[i].count = 0;
382 eob = (char *)buf+nbuf;
383 for(n = 0, p = (char *)buf; p < eob; n++) {
384 if (!fullrune(p, eob-p) && eob-p < UTFmax)
386 p += chartorune(&r, p);
389 else if (r <= 0x7f) {
390 if (!isprint(r) && !isspace(r))
391 f = Ceascii; /* ASCII control char */
393 } else if (r == 0x80) {
397 f = Cbinary; /* Invalid Runes */
399 f = Clatin; /* Latin 1 */
402 f = Cutf; /* UTF extension */
404 cfreq[f]++; /* ASCII chars peg directly */
411 else if (cfreq[Cutf])
413 else if (cfreq[Clatin])
415 else if (cfreq[Ceascii])
417 else if (cfreq[Cnull])
422 * lookup dictionary words
424 memset(wfreq, 0, sizeof(wfreq));
425 if(guess == Fascii || guess == Flatin || guess == Futf)
428 * call individual classify routines
430 for(i=0; call[i]; i++)
436 * print out gross classification
438 if (nbuf < 100 && !mime)
439 print(mime ? PLAIN : "short ");
441 print("%s\n", mime ? PLAIN : "Ascii");
442 else if (guess == Feascii)
443 print("%s\n", mime ? PLAIN : "extended ascii");
444 else if (guess == Flatin)
445 print("%s\n", mime ? PLAIN : "latin ascii");
446 else if (guess == Futf && utf_count() < 4)
448 else print("%s\n", mime ? OCTET : "binary");
452 bump_utf_count(Rune r)
456 high = sizeof(language)/sizeof(language[0])-1;
457 for (low = 0; low < high;) {
459 if (r >= language[mid].low) {
460 if (r <= language[mid].high) {
461 language[mid].count++;
474 for (i = 0; language[i].name; i++)
475 if (language[i].count > 0)
476 switch (language[i].mode) {
492 for (i = 'a'; i < 'z'; i++)
495 for (i = 'A'; i < 'Z'; i++)
502 find_first(char *name)
506 for (i = 0; language[i].name != 0; i++)
507 if (language[i].mode == First
508 && strcmp(language[i].name, name) == 0)
519 print("%s\n", PLAIN);
527 for (i = 0; language[i].name; i++)
528 if (language[i].count) {
529 switch(language[i].mode) {
531 j = find_first(language[i].name);
534 if (language[j].count > 0)
542 print("%s", language[i].name);
557 int low, high, mid, r;
562 while (p < buf+nbuf && !isalpha(*p))
567 while(p < buf+nbuf && isalpha(*p))
571 high = sizeof(dict)/sizeof(dict[0]);
572 for(low = 0;low < high;) {
574 r = strcmp(dict[mid].word, (char*)p2);
576 wfreq[dict[mid].class]++;
588 typedef struct Filemagic Filemagic;
597 * integers in this table must be as seen on a little-endian machine
598 * when read from a file.
600 Filemagic long0tab[] = {
601 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
603 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
605 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
606 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
607 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
608 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
609 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
610 070707, 0xFFFF, "cpio archive", "application/x-cpio",
611 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
612 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
613 0xf0ff, 0xf6ff, "aac audio", "audio/mpeg",
614 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
617 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
619 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
621 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
623 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
625 * venti & fossil magic numbers are stored big-endian on disk,
626 * thus the numbers appear reversed in this table.
628 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
629 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
630 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET,
631 /* tcpdump pcap file */
632 0xa1b2c3d4, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
633 0xd4c3b2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
634 0xa1b23c4d, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
635 0x4d3cb2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
639 filemagic(Filemagic *tab, int ntab, ulong x)
643 for(i=0; i<ntab; i++)
644 if((x&tab[i].mask) == tab[i].x){
645 print("%s\n", mime ? tab[i].mime : tab[i].desc);
654 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
657 typedef struct Fileoffmag Fileoffmag;
664 * integers in this table must be as seen on a little-endian machine
665 * when read from a file.
667 Fileoffmag longofftab[] = {
669 * venti & fossil magic numbers are stored big-endian on disk,
670 * thus the numbers appear reversed in this table.
672 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
673 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
674 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
675 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
676 0x100, 0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
677 0x1fc, 0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
681 fileoffmagic(Fileoffmag *tab, int ntab)
686 uchar buf[sizeof(long)];
688 for(i=0; i<ntab; i++) {
690 seek(fd, tp->off, 0);
691 if (readn(fd, buf, sizeof buf) != sizeof buf)
694 if((x&tp->mask) == tp->x){
695 print("%s\n", mime ? tp->mime : tp->desc);
705 return fileoffmagic(longofftab, nelem(longofftab));
713 seek(fd, 0, 0); /* reposition to start of file */
714 if(crackhdr(fd, &f)) {
715 print("%s\n", mime ? OCTET : f.name);
723 enum { NAMSIZ = 100, TBLOCK = 512 };
738 char linkname[NAMSIZ];
739 /* rest are defined by POSIX's ustar format; see p1003.2b */
740 char magic[6]; /* "ustar" */
746 char prefix[155]; /* if non-null, path = prefix "/" name */
751 checksum(union hblock *hp)
755 struct header *hdr = &hp->dbuf;
757 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
760 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
770 union hblock *hp = (union hblock *)tblock;
771 struct header *hdr = &hp->dbuf;
773 seek(fd, 0, 0); /* reposition to start of file */
774 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
776 chksum = strtol(hdr->chksum, 0, 8);
777 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
778 if (strcmp(hdr->magic, "ustar") == 0)
779 print(mime? "application/x-ustar\n": "posix tar archive\n");
781 print(mime? "application/x-tar\n": "tar archive\n");
788 * initial words to classify file
798 "\x1f\x9d", "compressed", 2, "application/x-compress",
799 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
800 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
801 "!<arch>\n__.SYMDEF", "archive random library", 16, OCTET,
802 "!<arch>\n", "archive", 8, OCTET,
803 "070707", "cpio archive - ascii header", 6, OCTET,
804 "#!/bin/rc", "rc executable file", 9, PLAIN,
805 "#!/bin/sh", "sh executable file", 9, PLAIN,
806 "%!", "postscript", 2, "application/postscript",
807 "\004%!", "postscript", 3, "application/postscript",
808 "x T post", "troff output for post", 8, "application/troff",
809 "x T Latin1", "troff output for Latin1", 10, "application/troff",
810 "x T utf", "troff output for UTF", 7, "application/troff",
811 "x T 202", "troff output for 202", 7, "application/troff",
812 "x T aps", "troff output for aps", 7, "application/troff",
813 "x T ", "troff output", 4, "application/troff",
814 "GIF", "GIF image", 3, "image/gif",
815 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
816 "%PDF", "PDF", 4, "application/pdf",
817 "<!DOCTYPE", "HTML file", 9, "text/html",
818 "<!doctype", "HTML file", 9, "text/html",
819 "<!--", "HTML file", 4, "text/html",
820 "<html>", "HTML file", 6, "text/html",
821 "<HTML>", "HTML file", 6, "text/html",
822 "<?xml", "HTML file", 5, "text/html",
823 "\111\111\052\000", "tiff", 4, "image/tiff",
824 "\115\115\000\052", "tiff", 4, "image/tiff",
825 "\377\330\377\340", "jpeg", 4, "image/jpeg",
826 "\377\330\377\341", "jpeg", 4, "image/jpeg",
827 "\377\330\377\333", "jpeg", 4, "image/jpeg",
828 "\xff\xd8", "jpeg", 2, "image/jpeg",
829 "BM", "bmp", 2, "image/bmp",
830 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
831 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
832 "\033E\033", "HP PCL printer data", 3, OCTET,
833 "\033&", "HP PCL printer data", 2, OCTET,
834 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
835 "\033Lua", "Lua bytecode", 4, OCTET,
836 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
837 "OggS", "ogg audio", 4, "audio/ogg",
838 ".snd", "sun audio", 4, "audio/basic",
839 "\211PNG", "PNG image", 4, "image/png",
840 "P1\n", "ppm", 3, "image/ppm",
841 "P2\n", "ppm", 3, "image/ppm",
842 "P3\n", "ppm", 3, "image/ppm",
843 "P4\n", "ppm", 3, "image/ppm",
844 "P5\n", "ppm", 3, "image/ppm",
845 "P6\n", "ppm", 3, "image/ppm",
846 "/* XPM */\n", "xbm", 10, "image/xbm",
847 ".HTML ", "troff -ms input", 6, "text/troff",
848 ".LP", "troff -ms input", 3, "text/troff",
849 ".ND", "troff -ms input", 3, "text/troff",
850 ".PP", "troff -ms input", 3, "text/troff",
851 ".TL", "troff -ms input", 3, "text/troff",
852 ".TR", "troff -ms input", 3, "text/troff",
853 ".TH", "manual page", 3, "text/troff",
854 ".\\\"", "troff input", 3, "text/troff",
855 ".de", "troff input", 3, "text/troff",
856 ".if", "troff input", 3, "text/troff",
857 ".nr", "troff input", 3, "text/troff",
858 ".tr", "troff input", 3, "text/troff",
859 "vac:", "venti score", 4, PLAIN,
860 "-----BEGIN CERTIFICATE-----\n",
861 "pem certificate", -1, PLAIN,
862 "-----BEGIN TRUSTED CERTIFICATE-----\n",
863 "pem trusted certificate", -1, PLAIN,
864 "-----BEGIN X509 CERTIFICATE-----\n",
865 "pem x.509 certificate", -1, PLAIN,
866 "subject=/C=", "pem certificate with header", -1, PLAIN,
867 "process snapshot ", "process snapshot", -1, "application/snapfs",
868 "d8:announce", "torrent file", 11, "application/x-bittorrent",
869 "[playlist]", "playlist", 10, "application/x-scpls",
870 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
871 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
872 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
873 "AT&T", "DjVu document", 4, "image/vnd.djvu",
874 "Extended module: ", "XM audio", 17, "audio/xm",
875 "MThd", "midi audio", 4, "audio/midi",
876 "MUS\x1a", "mus audio", 4, "audio/mus",
877 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
878 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
879 "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
880 "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
881 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
882 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
883 "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
884 "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
892 struct FILE_STRING *p;
894 for(p = file_string; p->key; p++) {
898 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
899 print("%s\n", mime ? p->mime : p->filetype);
903 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
904 for(i = 5; i < nbuf; i++)
908 print("%s\n", OCTET);
910 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
921 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
922 32*4, "DICM", "DICOM medical imaging data", 4, "application/dicom",
933 for(p = offstrs; p->key; p++) {
938 if (readn(fd, buf, n) != n)
940 if(memcmp(buf, p->key, n) == 0) {
941 print("%s\n", mime ? p->mime : p->filetype);
951 if (strncmp((char*)buf, "FORM", 4) == 0 &&
952 strncmp((char*)buf+8, "AIFF", 4) == 0) {
953 print("%s\n", mime? "audio/x-aiff": "aiff audio");
956 if (strncmp((char*)buf, "RIFF", 4) == 0) {
957 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
958 print("%s\n", mime? "audio/wave": "wave audio");
959 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
960 print("%s\n", mime? "video/avi": "avi video");
962 print("%s\n", mime? OCTET : "riff file");
968 char* html_string[] = {
970 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
972 "button", "center", "iframe", "object", "option", "script",
974 "blink", "embed", "frame", "input", "label", "param", "small",
975 "style", "table", "tbody", "tfoot", "thead", "title",
976 "?xml", "body", "code", "font", "form", "head", "html",
977 "link", "menu", "meta", "span",
978 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
979 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
980 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
981 "a", "b", "i", "p", "q", "u",
994 while(p < buf+nbuf && *p != '<')
1003 for(i = 0; html_string[i]; i++){
1004 n = strlen(html_string[i]);
1005 if(p + n > buf+nbuf)
1007 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1009 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1011 print("%s\n", mime ? "text/html" : "HTML file");
1022 char* rfc822_string[] =
1044 q = strchr(p, '\n');
1048 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1055 if(*p != '\t' && *p != ' '){
1059 for(i = 0; rfc822_string[i]; i++) {
1060 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1069 print("%s\n", mime ? "message/rfc822" : "email file");
1081 q = strchr(p, '\n');
1085 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1086 print("%s\n", mime ? PLAIN : "mail box");
1100 if(Binit(&b, fd, OREAD) == Beof)
1103 type = objtype(&b, &name);
1107 print("%s\n", OCTET);
1109 print("%s intermediate\n", name);
1122 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1124 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1129 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1134 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1140 print("%s\n", PLAIN);
1143 if(wfreq[Alword] > 0)
1144 print("alef program\n");
1146 print("c program\n");
1156 if(wfreq[Lword] < 4)
1158 print("%s\n", mime ? PLAIN : "limbo program");
1168 if(wfreq[Aword] < 2)
1170 print("%s\n", mime ? PLAIN : "as program");
1182 if((p[12] | p[13]<<8) == 0) /* width */
1184 if((p[14] | p[15]<<8) == 0) /* height */
1186 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1188 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1190 if(p[1] == 0){ /* non color-mapped */
1191 if((p[2]&3) != 2 && (p[2]&3) != 3)
1193 if((p[5] | p[6]<<8) != 0) /* palette length */
1196 if(p[1] == 1){ /* color-mapped */
1197 if((p[2]&3) != 1 || p[7] == 0)
1199 if((p[5] | p[6]<<8) == 0) /* palette length */
1203 print("%s\n", mime ? "image/tga" : "targa image");
1214 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1215 if((p[1] & 0xFE) == 0xFA){
1216 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1225 * low entropy means encrypted
1235 memset(bucket, 0, sizeof(bucket));
1236 for(i=nbuf-64; i<nbuf; i++)
1237 bucket[(buf[i]>>5)&07] += 1;
1241 cs += (bucket[i]-8)*(bucket[i]-8);
1244 if(buf[0]==0x1f && buf[1]==0x9d)
1245 print("%s\n", mime ? "application/x-compress" : "compressed");
1247 if(buf[0]==0x1f && buf[1]==0x8b)
1248 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1250 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1251 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1253 if(buf[0]==0x78 && buf[1]==0x9c)
1254 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1256 print("%s\n", mime ? OCTET : "encrypted");
1263 * english by punctuation and frequencies
1268 int vow, comm, rare, badpun, punct;
1271 if(guess != Fascii && guess != Feascii)
1275 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1285 if(p[1] != ' ' && p[1] != '\n')
1288 if(badpun*5 > punct)
1290 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1292 if(2*cfreq[';'] > cfreq['e'])
1296 for(p="AEIOU"; *p; p++) {
1298 vow += cfreq[tolower(*p)];
1301 for(p="ETAION"; *p; p++) {
1303 comm += cfreq[tolower(*p)];
1306 for(p="VJKQXZ"; *p; p++) {
1308 rare += cfreq[tolower(*p)];
1310 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1311 print("%s\n", mime ? PLAIN : "English text");
1318 * pick up a number with
1323 p9bitnum(char *s, int *v)
1327 if(s[P9BITLEN-1] != ' ')
1329 s[P9BITLEN-1] = '\0';
1330 *v = strtol(s, &es, 10);
1331 s[P9BITLEN-1] = ' ';
1332 if(es != &s[P9BITLEN-1])
1338 depthof(char *s, int *newp)
1345 while(s<es && *s==' ')
1349 if('0'<=*s && *s<='9')
1350 return 1<<strtol(s, nil, 0);
1354 while(s<es && *s!=' '){
1355 if(strchr("rgbkamx", *s) == nil)
1358 if('0'<=*s && *s<='9')
1359 d += strtoul(s, &s, 10);
1364 if(d % 8 == 0 || 8 % d == 0)
1373 int dep, lox, loy, hix, hiy, px, new, cmpr;
1380 if(memcmp(cp, "compressed\n", 11) == 0) {
1385 if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1387 newlabel = new ? "" : "old ";
1388 if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1390 if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1392 if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1394 if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1399 if(hix <= 0 || hiy <= 0)
1403 px = 8/dep; /* pixels per byte */
1404 /* set l to number of bytes of data per scan line */
1405 len = (hix+px-1)/px;
1408 len *= hiy; /* col length */
1409 len += 5 * P9BITLEN; /* size of initial ascii */
1412 * for compressed images, don't look any further. otherwise:
1413 * for image file, length is non-zero and must match calculation above.
1414 * for /dev/window and /dev/screen the length is always zero.
1415 * for subfont, the subfont header should follow immediately.
1418 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1419 newlabel, dep, hix, hiy);
1423 * mbuf->length == 0 probably indicates reading a pipe.
1424 * Ghostscript sometimes produces a little extra on the end.
1426 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1427 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1428 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1429 newlabel, dep, hix, hiy);
1432 if (p9subfont(buf+len)) {
1433 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1434 newlabel, dep, hix, hiy);
1445 /* if image too big, assume it's a subfont */
1446 if (p+3*P9BITLEN > buf+sizeof(buf))
1449 if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0) /* char count */
1451 if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0) /* height */
1453 if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0) /* ascent */
1455 if(n > 0 && h > 0 && a >= 0)
1460 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1467 char pathname[1024];
1470 if (!getfontnum(cp, &cp)) /* height */
1472 if (!getfontnum(cp, &cp)) /* ascent */
1474 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1475 if (!getfontnum(cp, &cp)) /* min */
1477 if (!getfontnum(cp, &cp)) /* max */
1479 getfontnum(cp, &cp); /* optional offset */
1480 while (WHITESPACE(*cp))
1482 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1484 /* construct a path name, if needed */
1486 if (*p != '/' && slash) {
1488 if (n < sizeof(pathname))
1489 memcpy(pathname, fname, n);
1492 if (n+cp-p+4 < sizeof(pathname)) {
1493 memcpy(pathname+n, p, cp-p);
1496 if (access(pathname, AEXIST) < 0) {
1497 strcpy(pathname+n, ".0");
1498 if (access(pathname, AEXIST) < 0)
1504 print("%s\n", mime ? PLAIN : "font file");
1511 getfontnum(uchar *cp, uchar **rp)
1513 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1515 if (*cp < '0' || *cp > '9')
1517 strtoul((char *)cp, (char **)rp, 0);
1518 if (!WHITESPACE(**rp)) {
1528 if(strstr((char *)buf, "\\rtf1")){
1529 print(mime ? "application/rtf\n" : "rich text format\n");
1538 if (buf[0] == 0x4d && buf[1] == 0x5a){
1539 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1548 if(buf[0] || buf[1] || buf[3] || buf[9])
1550 if(buf[4] == 0x00 && buf[5] == 0x00)
1554 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1557 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1566 static char *cpu[] = { /* NB: incomplete and arbitary list */
1589 static char *type[] = {
1590 [1] "relocatable object",
1592 [3] "shared library",
1596 if (memcmp(buf, "\x7fELF", 4) == 0){
1599 int n = (buf[19] << 8) | buf[18];
1600 char *p = "unknown";
1601 char *t = "unknown";
1603 if (n > 0 && n < nelem(cpu) && cpu[n])
1606 /* try the other byte order */
1608 n = (buf[18] << 8) | buf[19];
1609 if (n > 0 && n < nelem(cpu) && cpu[n])
1613 n = (buf[16]<< 8) | buf[17];
1615 n = (buf[17]<< 8) | buf[16];
1617 if(n>0 && n < nelem(type) && type[n])
1619 print("%s ELF %s\n", p, t);
1622 print("application/x-elf-executable\n");
1632 int i, j, ldepth, l;
1636 for(j = 0; j < 3; j++){
1637 for(p = (char*)buf, i=0; i<3; i++){
1638 if(p[0] != '0' || p[1] != 'x')
1642 else if(buf[2+4] == ',')
1653 while(*p == ' ' || *p == '\t')
1661 print("application/x-face\n");
1663 print("face image depth %d\n", ldepth);