8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
172 int p9bitnum(char*, int*);
173 int p9subfont(uchar*);
174 void print_utf(void);
175 void type(char*, int);
179 int (*call[])(void) =
181 long0, /* recognizable by first 4 bytes */
182 istring, /* recognizable by first string */
183 iself, /* ELF (foreign) executable */
184 isexec, /* native executables */
185 iff, /* interchange file format (strings) */
186 longoff, /* recognizable by 4 bytes at some offset */
187 isoffstr, /* recognizable by string at some offset */
188 isudiff, /* unified diff output */
189 isrfc822, /* email file */
190 ismbox, /* mail box */
191 istar, /* recognizable by tar checksum */
192 iscint, /* compiler/assembler intermediate */
193 ishtml, /* html keywords */
194 islimbo, /* limbo source */
195 isc, /* c & alef compiler key words */
196 isas, /* assembler key words */
197 isp9font, /* plan 9 font */
198 isp9bit, /* plan 9 image (as from /dev/window) */
199 isrtf, /* rich text format */
200 ismsdos, /* msdos exe (virus file attachement) */
201 isicocur, /* windows icon or cursor file */
202 isface, /* ascii face file */
208 ismung, /* entropy compressed/encrypted */
209 isenglish, /* char frequency English */
215 char OCTET[] = "application/octet-stream";
216 char PLAIN[] = "text/plain";
219 main(int argc, char *argv[])
230 fprint(2, "usage: file [-m] [file...]\n");
235 if(mime == 0 || argc > 1){
236 for(i = 0; i < argc; i++) {
237 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
249 for(i = 0; i < argc; i++)
250 type(argv[i], maxlen);
256 type(char *file, int nlen)
264 for (i = 0, p = file; *p; i++) {
265 if (*p == '/') /* find rightmost slash */
267 p += chartorune(&r, p); /* count runes */
269 print("%s:%*s",file, nlen-i+1, "");
272 if ((fd = open(file, OREAD)) < 0) {
273 fprint(2, "cannot open: %r\n");
291 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
296 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
301 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
302 memmove(buf, buf+3, nbuf-3);
306 if(memcmp(buf, "\xFE\xFF", 2) == 0){
312 memmove(rb, buf+2, nbuf);
314 e = p+sizeof(buf)-UTFmax-1;
315 for(i=0; i<nbuf && p < e; i+=2){
316 r = rb[i+1] | rb[i]<<8;
317 p += runetochar(p, &r);
321 nbuf = p - (char*)buf;
323 if(memcmp(buf, "\xFF\xFE", 2) == 0){
329 memmove(rb, buf+2, nbuf);
331 e = p+sizeof(buf)-UTFmax-1;
332 for(i=0; i<nbuf && p < e; i+=2){
333 r = rb[i] | rb[i+1]<<8;
334 p += runetochar(p, &r);
338 nbuf = p - (char*)buf;
352 fprint(2, "cannot stat: %r\n");
355 if(mbuf->mode & DMDIR) {
356 print("%s\n", mime ? OCTET : "directory");
359 if(mbuf->type != 'M' && mbuf->type != '|') {
361 print("%s\n", OCTET);
363 print("special file #%C/%s\n", mbuf->type, mbuf->name);
366 /* may be reading a pipe on standard input */
367 nbuf = readn(fd, buf, sizeof(buf)-1);
369 fprint(2, "cannot read: %r\n");
373 print("%s\n", mime ? PLAIN : "empty file");
381 * build histogram table
383 memset(cfreq, 0, sizeof(cfreq));
384 for (i = 0; language[i].name; i++)
385 language[i].count = 0;
386 eob = (char *)buf+nbuf;
387 for(n = 0, p = (char *)buf; p < eob; n++) {
388 if (!fullrune(p, eob-p) && eob-p < UTFmax)
390 p += chartorune(&r, p);
393 else if (r <= 0x7f) {
394 if (!isprint(r) && !isspace(r))
395 f = Ceascii; /* ASCII control char */
397 } else if (r == 0x80) {
401 f = Cbinary; /* Invalid Runes */
403 f = Clatin; /* Latin 1 */
406 f = Cutf; /* UTF extension */
408 cfreq[f]++; /* ASCII chars peg directly */
415 else if (cfreq[Cutf])
417 else if (cfreq[Clatin])
419 else if (cfreq[Ceascii])
421 else if (cfreq[Cnull])
426 * lookup dictionary words
428 memset(wfreq, 0, sizeof(wfreq));
429 if(guess == Fascii || guess == Flatin || guess == Futf)
432 * call individual classify routines
434 for(i=0; call[i]; i++)
440 * print out gross classification
442 if (nbuf < 100 && !mime)
443 print(mime ? PLAIN : "short ");
445 print("%s\n", mime ? PLAIN : "Ascii");
446 else if (guess == Feascii)
447 print("%s\n", mime ? PLAIN : "extended ascii");
448 else if (guess == Flatin)
449 print("%s\n", mime ? PLAIN : "latin ascii");
450 else if (guess == Futf && utf_count() < 4)
452 else print("%s\n", mime ? OCTET : "binary");
456 bump_utf_count(Rune r)
460 high = sizeof(language)/sizeof(language[0])-1;
461 for (low = 0; low < high;) {
463 if (r >= language[mid].low) {
464 if (r <= language[mid].high) {
465 language[mid].count++;
478 for (i = 0; language[i].name; i++)
479 if (language[i].count > 0)
480 switch (language[i].mode) {
496 for (i = 'a'; i < 'z'; i++)
499 for (i = 'A'; i < 'Z'; i++)
506 find_first(char *name)
510 for (i = 0; language[i].name != 0; i++)
511 if (language[i].mode == First
512 && strcmp(language[i].name, name) == 0)
523 print("%s\n", PLAIN);
531 for (i = 0; language[i].name; i++)
532 if (language[i].count) {
533 switch(language[i].mode) {
535 j = find_first(language[i].name);
538 if (language[j].count > 0)
546 print("%s", language[i].name);
561 int low, high, mid, r;
566 while (p < buf+nbuf && !isalpha(*p))
571 while(p < buf+nbuf && isalpha(*p))
575 high = sizeof(dict)/sizeof(dict[0]);
576 for(low = 0;low < high;) {
578 r = strcmp(dict[mid].word, (char*)p2);
580 wfreq[dict[mid].class]++;
592 typedef struct Filemagic Filemagic;
601 * integers in this table must be as seen on a little-endian machine
602 * when read from a file.
604 Filemagic long0tab[] = {
605 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
607 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
609 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
610 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
611 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
612 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
613 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
614 070707, 0xFFFF, "cpio archive", "application/x-cpio",
615 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
616 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
617 0xf0ff, 0xf6ff, "aac audio", "audio/mpeg",
618 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
619 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
621 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
623 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
625 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
627 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
629 * venti & fossil magic numbers are stored big-endian on disk,
630 * thus the numbers appear reversed in this table.
632 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
633 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
634 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET,
635 /* tcpdump pcap file */
636 0xa1b2c3d4, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
637 0xd4c3b2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
638 0xa1b23c4d, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
639 0x4d3cb2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
643 filemagic(Filemagic *tab, int ntab, ulong x)
647 for(i=0; i<ntab; i++)
648 if((x&tab[i].mask) == tab[i].x){
649 print("%s\n", mime ? tab[i].mime : tab[i].desc);
658 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
661 typedef struct Fileoffmag Fileoffmag;
668 * integers in this table must be as seen on a little-endian machine
669 * when read from a file.
671 Fileoffmag longofftab[] = {
673 * venti & fossil magic numbers are stored big-endian on disk,
674 * thus the numbers appear reversed in this table.
676 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
677 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
678 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
679 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
680 0x100, 0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
681 0x1fc, 0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
685 fileoffmagic(Fileoffmag *tab, int ntab)
690 uchar buf[sizeof(long)];
692 for(i=0; i<ntab; i++) {
694 seek(fd, tp->off, 0);
695 if (readn(fd, buf, sizeof buf) != sizeof buf)
698 if((x&tp->mask) == tp->x){
699 print("%s\n", mime ? tp->mime : tp->desc);
709 return fileoffmagic(longofftab, nelem(longofftab));
717 seek(fd, 0, 0); /* reposition to start of file */
718 if(crackhdr(fd, &f)) {
719 print("%s\n", mime ? OCTET : f.name);
727 enum { NAMSIZ = 100, TBLOCK = 512 };
742 char linkname[NAMSIZ];
743 /* rest are defined by POSIX's ustar format; see p1003.2b */
744 char magic[6]; /* "ustar" */
750 char prefix[155]; /* if non-null, path = prefix "/" name */
755 checksum(union hblock *hp)
759 struct header *hdr = &hp->dbuf;
761 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
764 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
774 union hblock *hp = (union hblock *)tblock;
775 struct header *hdr = &hp->dbuf;
777 seek(fd, 0, 0); /* reposition to start of file */
778 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
780 chksum = strtol(hdr->chksum, 0, 8);
781 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
782 if (strcmp(hdr->magic, "ustar") == 0)
783 print(mime? "application/x-ustar\n": "posix tar archive\n");
785 print(mime? "application/x-tar\n": "tar archive\n");
792 * initial words to classify file
802 "\x1f\x9d", "compressed", 2, "application/x-compress",
803 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
804 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
805 "!<arch>\n__.SYMDEF", "archive random library", 16, OCTET,
806 "!<arch>\n", "archive", 8, OCTET,
807 "070707", "cpio archive - ascii header", 6, OCTET,
808 "#!/bin/rc", "rc executable file", 9, PLAIN,
809 "#!/bin/sh", "sh executable file", 9, PLAIN,
810 "%!", "postscript", 2, "application/postscript",
811 "\004%!", "postscript", 3, "application/postscript",
812 "x T post", "troff output for post", 8, "application/troff",
813 "x T Latin1", "troff output for Latin1", 10, "application/troff",
814 "x T utf", "troff output for UTF", 7, "application/troff",
815 "x T 202", "troff output for 202", 7, "application/troff",
816 "x T aps", "troff output for aps", 7, "application/troff",
817 "x T ", "troff output", 4, "application/troff",
818 "GIF", "GIF image", 3, "image/gif",
819 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
820 "%PDF", "PDF", 4, "application/pdf",
821 "<!DOCTYPE", "HTML file", 9, "text/html",
822 "<!doctype", "HTML file", 9, "text/html",
823 "<!--", "HTML file", 4, "text/html",
824 "<html>", "HTML file", 6, "text/html",
825 "<HTML>", "HTML file", 6, "text/html",
826 "<?xml", "HTML file", 5, "text/html",
827 "\111\111\052\000", "tiff", 4, "image/tiff",
828 "\115\115\000\052", "tiff", 4, "image/tiff",
829 "\377\330\377\340", "jpeg", 4, "image/jpeg",
830 "\377\330\377\341", "jpeg", 4, "image/jpeg",
831 "\377\330\377\333", "jpeg", 4, "image/jpeg",
832 "\xff\xd8", "jpeg", 2, "image/jpeg",
833 "BM", "bmp", 2, "image/bmp",
834 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
835 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
836 "\033E\033", "HP PCL printer data", 3, OCTET,
837 "\033&", "HP PCL printer data", 2, OCTET,
838 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
839 "\033Lua", "Lua bytecode", 4, OCTET,
840 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
841 "OggS", "ogg audio", 4, "audio/ogg",
842 ".snd", "sun audio", 4, "audio/basic",
843 "\211PNG", "PNG image", 4, "image/png",
844 "P1\n", "ppm", 3, "image/ppm",
845 "P2\n", "ppm", 3, "image/ppm",
846 "P3\n", "ppm", 3, "image/ppm",
847 "P4\n", "ppm", 3, "image/ppm",
848 "P5\n", "ppm", 3, "image/ppm",
849 "P6\n", "ppm", 3, "image/ppm",
850 "/* XPM */\n", "xbm", 10, "image/xbm",
851 ".HTML ", "troff -ms input", 6, "text/troff",
852 ".LP", "troff -ms input", 3, "text/troff",
853 ".ND", "troff -ms input", 3, "text/troff",
854 ".PP", "troff -ms input", 3, "text/troff",
855 ".TL", "troff -ms input", 3, "text/troff",
856 ".TR", "troff -ms input", 3, "text/troff",
857 ".TH", "manual page", 3, "text/troff",
858 ".\\\"", "troff input", 3, "text/troff",
859 ".de", "troff input", 3, "text/troff",
860 ".if", "troff input", 3, "text/troff",
861 ".nr", "troff input", 3, "text/troff",
862 ".tr", "troff input", 3, "text/troff",
863 "vac:", "venti score", 4, PLAIN,
864 "-----BEGIN CERTIFICATE-----\n",
865 "pem certificate", -1, PLAIN,
866 "-----BEGIN TRUSTED CERTIFICATE-----\n",
867 "pem trusted certificate", -1, PLAIN,
868 "-----BEGIN X509 CERTIFICATE-----\n",
869 "pem x.509 certificate", -1, PLAIN,
870 "subject=/C=", "pem certificate with header", -1, PLAIN,
871 "process snapshot ", "process snapshot", -1, "application/snapfs",
872 "d8:announce", "torrent file", 11, "application/x-bittorrent",
873 "[playlist]", "playlist", 10, "application/x-scpls",
874 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
875 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
876 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
877 "AT&T", "DjVu document", 4, "image/vnd.djvu",
878 "Extended module: ", "XM audio", 17, "audio/xm",
879 "MThd", "midi audio", 4, "audio/midi",
880 "MUS\x1a", "mus audio", 4, "audio/mus",
881 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
882 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
883 "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
884 "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
885 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
886 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
887 "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
888 "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
896 struct FILE_STRING *p;
898 for(p = file_string; p->key; p++) {
902 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
903 print("%s\n", mime ? p->mime : p->filetype);
907 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
908 for(i = 5; i < nbuf; i++)
912 print("%s\n", OCTET);
914 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
925 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
926 32*4, "DICM", "DICOM medical imaging data", 4, "application/dicom",
937 for(p = offstrs; p->key; p++) {
942 if (readn(fd, buf, n) != n)
944 if(memcmp(buf, p->key, n) == 0) {
945 print("%s\n", mime ? p->mime : p->filetype);
955 if (strncmp((char*)buf, "FORM", 4) == 0 &&
956 strncmp((char*)buf+8, "AIFF", 4) == 0) {
957 print("%s\n", mime? "audio/x-aiff": "aiff audio");
960 if (strncmp((char*)buf, "RIFF", 4) == 0) {
961 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
962 print("%s\n", mime? "audio/wave": "wave audio");
963 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
964 print("%s\n", mime? "video/avi": "avi video");
966 print("%s\n", mime? OCTET : "riff file");
972 char* html_string[] = {
974 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
976 "button", "center", "iframe", "object", "option", "script",
978 "blink", "embed", "frame", "input", "label", "param", "small",
979 "style", "table", "tbody", "tfoot", "thead", "title",
980 "?xml", "body", "code", "font", "form", "head", "html",
981 "link", "menu", "meta", "span",
982 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
983 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
984 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
985 "a", "b", "i", "p", "q", "u",
995 if((p = strstr(p, "diff")) != nil)
996 if((p = strchr(p, '\n')) != nil)
997 if(strncmp(++p, "--- ", 4) == 0)
998 if((p = strchr(p, '\n')) != nil)
999 if(strncmp(++p, "+++ ", 4) == 0)
1000 if((p = strchr(p, '\n')) != nil)
1001 if(strncmp(++p, "@@ ", 3) == 0){
1002 print("%s\n", mime ? "text/plain" : "unified diff output");
1017 while(p < buf+nbuf && *p != '<')
1026 for(i = 0; html_string[i]; i++){
1027 n = strlen(html_string[i]);
1028 if(p + n > buf+nbuf)
1030 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1032 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1034 print("%s\n", mime ? "text/html" : "HTML file");
1045 char* rfc822_string[] =
1067 q = strchr(p, '\n');
1071 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1078 if(*p != '\t' && *p != ' '){
1082 for(i = 0; rfc822_string[i]; i++) {
1083 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1092 print("%s\n", mime ? "message/rfc822" : "email file");
1104 q = strchr(p, '\n');
1108 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1109 print("%s\n", mime ? PLAIN : "mail box");
1123 if(Binit(&b, fd, OREAD) == Beof)
1126 type = objtype(&b, &name);
1130 print("%s\n", OCTET);
1132 print("%s intermediate\n", name);
1145 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1147 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1152 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1157 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1163 print("%s\n", PLAIN);
1166 if(wfreq[Alword] > 0)
1167 print("alef program\n");
1169 print("c program\n");
1179 if(wfreq[Lword] < 4)
1181 print("%s\n", mime ? PLAIN : "limbo program");
1191 if(wfreq[Aword] < 2)
1193 print("%s\n", mime ? PLAIN : "as program");
1205 if((p[12] | p[13]<<8) == 0) /* width */
1207 if((p[14] | p[15]<<8) == 0) /* height */
1209 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1211 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1213 if(p[1] == 0){ /* non color-mapped */
1214 if((p[2]&3) != 2 && (p[2]&3) != 3)
1216 if((p[5] | p[6]<<8) != 0) /* palette length */
1219 if(p[1] == 1){ /* color-mapped */
1220 if((p[2]&3) != 1 || p[7] == 0)
1222 if((p[5] | p[6]<<8) == 0) /* palette length */
1226 print("%s\n", mime ? "image/tga" : "targa image");
1237 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1238 if((p[1] & 0xFE) == 0xFA){
1239 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1252 if(memcmp(&buf[4], "ftyp", 4) != 0)
1254 if(memcmp(&buf[8], "isom", 4) == 0){
1255 print("%s\n", mime ? "video/mp4" : "mp4 video");
1258 if(memcmp(&buf[8], "M4A ", 4) == 0){
1259 print("%s\n", mime ? "audio/m4a" : "m4a audio");
1266 * low entropy means encrypted
1276 memset(bucket, 0, sizeof(bucket));
1277 for(i=nbuf-64; i<nbuf; i++)
1278 bucket[(buf[i]>>5)&07] += 1;
1282 cs += (bucket[i]-8)*(bucket[i]-8);
1285 if(buf[0]==0x1f && buf[1]==0x9d)
1286 print("%s\n", mime ? "application/x-compress" : "compressed");
1288 if(buf[0]==0x1f && buf[1]==0x8b)
1289 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1291 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1292 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1294 if(buf[0]==0x78 && buf[1]==0x9c)
1295 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1297 print("%s\n", mime ? OCTET : "encrypted");
1304 * english by punctuation and frequencies
1309 int vow, comm, rare, badpun, punct;
1312 if(guess != Fascii && guess != Feascii)
1316 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1326 if(p[1] != ' ' && p[1] != '\n')
1329 if(badpun*5 > punct)
1331 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1333 if(2*cfreq[';'] > cfreq['e'])
1337 for(p="AEIOU"; *p; p++) {
1339 vow += cfreq[tolower(*p)];
1342 for(p="ETAION"; *p; p++) {
1344 comm += cfreq[tolower(*p)];
1347 for(p="VJKQXZ"; *p; p++) {
1349 rare += cfreq[tolower(*p)];
1351 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1352 print("%s\n", mime ? PLAIN : "English text");
1359 * pick up a number with
1364 p9bitnum(char *s, int *v)
1368 if(s[P9BITLEN-1] != ' ')
1370 s[P9BITLEN-1] = '\0';
1371 *v = strtol(s, &es, 10);
1372 s[P9BITLEN-1] = ' ';
1373 if(es != &s[P9BITLEN-1])
1379 depthof(char *s, int *newp)
1386 while(s<es && *s==' ')
1390 if('0'<=*s && *s<='9')
1391 return 1<<strtol(s, nil, 0);
1395 while(s<es && *s!=' '){
1396 if(strchr("rgbkamx", *s) == nil)
1399 if('0'<=*s && *s<='9')
1400 d += strtoul(s, &s, 10);
1405 if(d % 8 == 0 || 8 % d == 0)
1414 int dep, lox, loy, hix, hiy, px, new, cmpr;
1421 if(memcmp(cp, "compressed\n", 11) == 0) {
1426 if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1428 newlabel = new ? "" : "old ";
1429 if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1431 if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1433 if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1435 if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1440 if(hix <= 0 || hiy <= 0)
1444 px = 8/dep; /* pixels per byte */
1445 /* set l to number of bytes of data per scan line */
1446 len = (hix+px-1)/px;
1449 len *= hiy; /* col length */
1450 len += 5 * P9BITLEN; /* size of initial ascii */
1453 * for compressed images, don't look any further. otherwise:
1454 * for image file, length is non-zero and must match calculation above.
1455 * for /dev/window and /dev/screen the length is always zero.
1456 * for subfont, the subfont header should follow immediately.
1459 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1460 newlabel, dep, hix, hiy);
1464 * mbuf->length == 0 probably indicates reading a pipe.
1465 * Ghostscript sometimes produces a little extra on the end.
1467 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1468 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1469 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1470 newlabel, dep, hix, hiy);
1473 if (p9subfont(buf+len)) {
1474 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1475 newlabel, dep, hix, hiy);
1486 /* if image too big, assume it's a subfont */
1487 if (p+3*P9BITLEN > buf+sizeof(buf))
1490 if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0) /* char count */
1492 if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0) /* height */
1494 if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0) /* ascent */
1496 if(n > 0 && h > 0 && a >= 0)
1501 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1508 char pathname[1024];
1511 if (!getfontnum(cp, &cp)) /* height */
1513 if (!getfontnum(cp, &cp)) /* ascent */
1515 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1516 if (!getfontnum(cp, &cp)) /* min */
1518 if (!getfontnum(cp, &cp)) /* max */
1520 getfontnum(cp, &cp); /* optional offset */
1521 while (WHITESPACE(*cp))
1523 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1525 /* construct a path name, if needed */
1527 if (*p != '/' && slash) {
1529 if (n < sizeof(pathname))
1530 memcpy(pathname, fname, n);
1533 if (n+cp-p+4 < sizeof(pathname)) {
1534 memcpy(pathname+n, p, cp-p);
1537 if (access(pathname, AEXIST) < 0) {
1538 strcpy(pathname+n, ".0");
1539 if (access(pathname, AEXIST) < 0)
1545 print("%s\n", mime ? PLAIN : "font file");
1552 getfontnum(uchar *cp, uchar **rp)
1554 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1556 if (*cp < '0' || *cp > '9')
1558 strtoul((char *)cp, (char **)rp, 0);
1559 if (!WHITESPACE(**rp)) {
1569 if(strstr((char *)buf, "\\rtf1")){
1570 print(mime ? "application/rtf\n" : "rich text format\n");
1579 if (buf[0] == 0x4d && buf[1] == 0x5a){
1580 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1589 if(buf[0] || buf[1] || buf[3] || buf[9])
1591 if(buf[4] == 0x00 && buf[5] == 0x00)
1595 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1598 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1607 static char *cpu[] = { /* NB: incomplete and arbitary list */
1630 static char *type[] = {
1631 [1] "relocatable object",
1633 [3] "shared library",
1637 if (memcmp(buf, "\x7fELF", 4) == 0){
1640 int n = (buf[19] << 8) | buf[18];
1641 char *p = "unknown";
1642 char *t = "unknown";
1644 if (n > 0 && n < nelem(cpu) && cpu[n])
1647 /* try the other byte order */
1649 n = (buf[18] << 8) | buf[19];
1650 if (n > 0 && n < nelem(cpu) && cpu[n])
1654 n = (buf[16]<< 8) | buf[17];
1656 n = (buf[17]<< 8) | buf[16];
1658 if(n>0 && n < nelem(type) && type[n])
1660 print("%s ELF %s\n", p, t);
1663 print("application/x-elf-executable\n");
1673 int i, j, ldepth, l;
1677 for(j = 0; j < 3; j++){
1678 for(p = (char*)buf, i=0; i<3; i++){
1679 if(p[0] != '0' || p[1] != 'x')
1683 else if(buf[2+4] == ',')
1694 while(*p == ' ' || *p == '\t')
1702 print("application/x-face\n");
1704 print("face image depth %d\n", ldepth);