8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
171 int p9bitnum(char*, int*);
172 int p9subfont(uchar*);
173 void print_utf(void);
174 void type(char*, int);
178 int (*call[])(void) =
180 long0, /* recognizable by first 4 bytes */
181 istring, /* recognizable by first string */
182 iself, /* ELF (foreign) executable */
183 isexec, /* native executables */
184 iff, /* interchange file format (strings) */
185 longoff, /* recognizable by 4 bytes at some offset */
186 isoffstr, /* recognizable by string at some offset */
187 isudiff, /* unified diff output */
188 isrfc822, /* email file */
189 ismbox, /* mail box */
190 istar, /* recognizable by tar checksum */
191 iscint, /* compiler/assembler intermediate */
192 ishtml, /* html keywords */
193 islimbo, /* limbo source */
194 isc, /* c & alef compiler key words */
195 isas, /* assembler key words */
196 isp9font, /* plan 9 font */
197 isp9bit, /* plan 9 image (as from /dev/window) */
198 isrtf, /* rich text format */
199 ismsdos, /* msdos exe (virus file attachement) */
200 isicocur, /* windows icon or cursor file */
201 isface, /* ascii face file */
206 ismung, /* entropy compressed/encrypted */
207 isenglish, /* char frequency English */
213 char OCTET[] = "application/octet-stream";
214 char PLAIN[] = "text/plain";
217 main(int argc, char *argv[])
228 fprint(2, "usage: file [-m] [file...]\n");
233 if(mime == 0 || argc > 1){
234 for(i = 0; i < argc; i++) {
235 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
247 for(i = 0; i < argc; i++)
248 type(argv[i], maxlen);
254 type(char *file, int nlen)
262 for (i = 0, p = file; *p; i++) {
263 if (*p == '/') /* find rightmost slash */
265 p += chartorune(&r, p); /* count runes */
267 print("%s:%*s",file, nlen-i+1, "");
270 if ((fd = open(file, OREAD)) < 0) {
271 fprint(2, "cannot open: %r\n");
289 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
294 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
299 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
300 memmove(buf, buf+3, nbuf-3);
304 if(memcmp(buf, "\xFE\xFF", 2) == 0){
310 memmove(rb, buf+2, nbuf);
312 e = p+sizeof(buf)-UTFmax-1;
313 for(i=0; i<nbuf && p < e; i+=2){
314 r = rb[i+1] | rb[i]<<8;
315 p += runetochar(p, &r);
319 nbuf = p - (char*)buf;
321 if(memcmp(buf, "\xFF\xFE", 2) == 0){
327 memmove(rb, buf+2, nbuf);
329 e = p+sizeof(buf)-UTFmax-1;
330 for(i=0; i<nbuf && p < e; i+=2){
331 r = rb[i] | rb[i+1]<<8;
332 p += runetochar(p, &r);
336 nbuf = p - (char*)buf;
350 fprint(2, "cannot stat: %r\n");
353 if(mbuf->mode & DMDIR) {
354 print("%s\n", mime ? OCTET : "directory");
357 if(mbuf->type != 'M' && mbuf->type != '|') {
359 print("%s\n", OCTET);
361 print("special file #%C/%s\n", mbuf->type, mbuf->name);
364 /* may be reading a pipe on standard input */
365 nbuf = readn(fd, buf, sizeof(buf)-1);
367 fprint(2, "cannot read: %r\n");
371 print("%s\n", mime ? PLAIN : "empty file");
379 * build histogram table
381 memset(cfreq, 0, sizeof(cfreq));
382 for (i = 0; language[i].name; i++)
383 language[i].count = 0;
384 eob = (char *)buf+nbuf;
385 for(n = 0, p = (char *)buf; p < eob; n++) {
386 if (!fullrune(p, eob-p) && eob-p < UTFmax)
388 p += chartorune(&r, p);
391 else if (r <= 0x7f) {
392 if (!isprint(r) && !isspace(r))
393 f = Ceascii; /* ASCII control char */
395 } else if (r == 0x80) {
399 f = Cbinary; /* Invalid Runes */
401 f = Clatin; /* Latin 1 */
404 f = Cutf; /* UTF extension */
406 cfreq[f]++; /* ASCII chars peg directly */
413 else if (cfreq[Cutf])
415 else if (cfreq[Clatin])
417 else if (cfreq[Ceascii])
419 else if (cfreq[Cnull])
424 * lookup dictionary words
426 memset(wfreq, 0, sizeof(wfreq));
427 if(guess == Fascii || guess == Flatin || guess == Futf)
430 * call individual classify routines
432 for(i=0; call[i]; i++)
438 * print out gross classification
440 if (nbuf < 100 && !mime)
441 print(mime ? PLAIN : "short ");
443 print("%s\n", mime ? PLAIN : "Ascii");
444 else if (guess == Feascii)
445 print("%s\n", mime ? PLAIN : "extended ascii");
446 else if (guess == Flatin)
447 print("%s\n", mime ? PLAIN : "latin ascii");
448 else if (guess == Futf && utf_count() < 4)
450 else print("%s\n", mime ? OCTET : "binary");
454 bump_utf_count(Rune r)
458 high = sizeof(language)/sizeof(language[0])-1;
459 for (low = 0; low < high;) {
461 if (r >= language[mid].low) {
462 if (r <= language[mid].high) {
463 language[mid].count++;
476 for (i = 0; language[i].name; i++)
477 if (language[i].count > 0)
478 switch (language[i].mode) {
494 for (i = 'a'; i < 'z'; i++)
497 for (i = 'A'; i < 'Z'; i++)
504 find_first(char *name)
508 for (i = 0; language[i].name != 0; i++)
509 if (language[i].mode == First
510 && strcmp(language[i].name, name) == 0)
521 print("%s\n", PLAIN);
529 for (i = 0; language[i].name; i++)
530 if (language[i].count) {
531 switch(language[i].mode) {
533 j = find_first(language[i].name);
536 if (language[j].count > 0)
544 print("%s", language[i].name);
559 int low, high, mid, r;
564 while (p < buf+nbuf && !isalpha(*p))
569 while(p < buf+nbuf && isalpha(*p))
573 high = sizeof(dict)/sizeof(dict[0]);
574 for(low = 0;low < high;) {
576 r = strcmp(dict[mid].word, (char*)p2);
578 wfreq[dict[mid].class]++;
590 typedef struct Filemagic Filemagic;
599 * integers in this table must be as seen on a little-endian machine
600 * when read from a file.
602 Filemagic long0tab[] = {
603 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
605 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
607 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
608 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
609 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
610 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
611 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
612 070707, 0xFFFF, "cpio archive", "application/x-cpio",
613 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
614 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
615 0xf0ff, 0xf6ff, "aac audio", "audio/mpeg",
616 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
617 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
619 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
621 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
623 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
625 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
627 * venti & fossil magic numbers are stored big-endian on disk,
628 * thus the numbers appear reversed in this table.
630 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
631 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
632 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET,
633 /* tcpdump pcap file */
634 0xa1b2c3d4, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
635 0xd4c3b2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
636 0xa1b23c4d, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
637 0x4d3cb2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
641 filemagic(Filemagic *tab, int ntab, ulong x)
645 for(i=0; i<ntab; i++)
646 if((x&tab[i].mask) == tab[i].x){
647 print("%s\n", mime ? tab[i].mime : tab[i].desc);
656 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
659 typedef struct Fileoffmag Fileoffmag;
666 * integers in this table must be as seen on a little-endian machine
667 * when read from a file.
669 Fileoffmag longofftab[] = {
671 * venti & fossil magic numbers are stored big-endian on disk,
672 * thus the numbers appear reversed in this table.
674 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
675 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
676 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
677 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
678 0x100, 0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
679 0x1fc, 0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
683 fileoffmagic(Fileoffmag *tab, int ntab)
688 uchar buf[sizeof(long)];
690 for(i=0; i<ntab; i++) {
692 seek(fd, tp->off, 0);
693 if (readn(fd, buf, sizeof buf) != sizeof buf)
696 if((x&tp->mask) == tp->x){
697 print("%s\n", mime ? tp->mime : tp->desc);
707 return fileoffmagic(longofftab, nelem(longofftab));
715 seek(fd, 0, 0); /* reposition to start of file */
716 if(crackhdr(fd, &f)) {
717 print("%s\n", mime ? OCTET : f.name);
725 enum { NAMSIZ = 100, TBLOCK = 512 };
740 char linkname[NAMSIZ];
741 /* rest are defined by POSIX's ustar format; see p1003.2b */
742 char magic[6]; /* "ustar" */
748 char prefix[155]; /* if non-null, path = prefix "/" name */
753 checksum(union hblock *hp)
757 struct header *hdr = &hp->dbuf;
759 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
762 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
772 union hblock *hp = (union hblock *)tblock;
773 struct header *hdr = &hp->dbuf;
775 seek(fd, 0, 0); /* reposition to start of file */
776 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
778 chksum = strtol(hdr->chksum, 0, 8);
779 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
780 if (strcmp(hdr->magic, "ustar") == 0)
781 print(mime? "application/x-ustar\n": "posix tar archive\n");
783 print(mime? "application/x-tar\n": "tar archive\n");
790 * initial words to classify file
800 "\x1f\x9d", "compressed", 2, "application/x-compress",
801 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
802 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
803 "!<arch>\n__.SYMDEF", "archive random library", 16, OCTET,
804 "!<arch>\n", "archive", 8, OCTET,
805 "070707", "cpio archive - ascii header", 6, OCTET,
806 "#!/bin/rc", "rc executable file", 9, PLAIN,
807 "#!/bin/sh", "sh executable file", 9, PLAIN,
808 "%!", "postscript", 2, "application/postscript",
809 "\004%!", "postscript", 3, "application/postscript",
810 "x T post", "troff output for post", 8, "application/troff",
811 "x T Latin1", "troff output for Latin1", 10, "application/troff",
812 "x T utf", "troff output for UTF", 7, "application/troff",
813 "x T 202", "troff output for 202", 7, "application/troff",
814 "x T aps", "troff output for aps", 7, "application/troff",
815 "x T ", "troff output", 4, "application/troff",
816 "GIF", "GIF image", 3, "image/gif",
817 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
818 "%PDF", "PDF", 4, "application/pdf",
819 "<!DOCTYPE", "HTML file", 9, "text/html",
820 "<!doctype", "HTML file", 9, "text/html",
821 "<!--", "HTML file", 4, "text/html",
822 "<html>", "HTML file", 6, "text/html",
823 "<HTML>", "HTML file", 6, "text/html",
824 "<?xml", "HTML file", 5, "text/html",
825 "\111\111\052\000", "tiff", 4, "image/tiff",
826 "\115\115\000\052", "tiff", 4, "image/tiff",
827 "\377\330\377\340", "jpeg", 4, "image/jpeg",
828 "\377\330\377\341", "jpeg", 4, "image/jpeg",
829 "\377\330\377\333", "jpeg", 4, "image/jpeg",
830 "\xff\xd8", "jpeg", 2, "image/jpeg",
831 "BM", "bmp", 2, "image/bmp",
832 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
833 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
834 "\033E\033", "HP PCL printer data", 3, OCTET,
835 "\033&", "HP PCL printer data", 2, OCTET,
836 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
837 "\033Lua", "Lua bytecode", 4, OCTET,
838 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
839 "OggS", "ogg audio", 4, "audio/ogg",
840 ".snd", "sun audio", 4, "audio/basic",
841 "\211PNG", "PNG image", 4, "image/png",
842 "P1\n", "ppm", 3, "image/ppm",
843 "P2\n", "ppm", 3, "image/ppm",
844 "P3\n", "ppm", 3, "image/ppm",
845 "P4\n", "ppm", 3, "image/ppm",
846 "P5\n", "ppm", 3, "image/ppm",
847 "P6\n", "ppm", 3, "image/ppm",
848 "/* XPM */\n", "xbm", 10, "image/xbm",
849 ".HTML ", "troff -ms input", 6, "text/troff",
850 ".LP", "troff -ms input", 3, "text/troff",
851 ".ND", "troff -ms input", 3, "text/troff",
852 ".PP", "troff -ms input", 3, "text/troff",
853 ".TL", "troff -ms input", 3, "text/troff",
854 ".TR", "troff -ms input", 3, "text/troff",
855 ".TH", "manual page", 3, "text/troff",
856 ".\\\"", "troff input", 3, "text/troff",
857 ".de", "troff input", 3, "text/troff",
858 ".if", "troff input", 3, "text/troff",
859 ".nr", "troff input", 3, "text/troff",
860 ".tr", "troff input", 3, "text/troff",
861 "vac:", "venti score", 4, PLAIN,
862 "-----BEGIN CERTIFICATE-----\n",
863 "pem certificate", -1, PLAIN,
864 "-----BEGIN TRUSTED CERTIFICATE-----\n",
865 "pem trusted certificate", -1, PLAIN,
866 "-----BEGIN X509 CERTIFICATE-----\n",
867 "pem x.509 certificate", -1, PLAIN,
868 "subject=/C=", "pem certificate with header", -1, PLAIN,
869 "process snapshot ", "process snapshot", -1, "application/snapfs",
870 "d8:announce", "torrent file", 11, "application/x-bittorrent",
871 "[playlist]", "playlist", 10, "application/x-scpls",
872 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
873 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
874 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
875 "AT&T", "DjVu document", 4, "image/vnd.djvu",
876 "Extended module: ", "XM audio", 17, "audio/xm",
877 "MThd", "midi audio", 4, "audio/midi",
878 "MUS\x1a", "mus audio", 4, "audio/mus",
879 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
880 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
881 "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
882 "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
883 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
884 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
885 "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
886 "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
894 struct FILE_STRING *p;
896 for(p = file_string; p->key; p++) {
900 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
901 print("%s\n", mime ? p->mime : p->filetype);
905 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
906 for(i = 5; i < nbuf; i++)
910 print("%s\n", OCTET);
912 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
923 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
924 32*4, "DICM", "DICOM medical imaging data", 4, "application/dicom",
935 for(p = offstrs; p->key; p++) {
940 if (readn(fd, buf, n) != n)
942 if(memcmp(buf, p->key, n) == 0) {
943 print("%s\n", mime ? p->mime : p->filetype);
953 if (strncmp((char*)buf, "FORM", 4) == 0 &&
954 strncmp((char*)buf+8, "AIFF", 4) == 0) {
955 print("%s\n", mime? "audio/x-aiff": "aiff audio");
958 if (strncmp((char*)buf, "RIFF", 4) == 0) {
959 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
960 print("%s\n", mime? "audio/wave": "wave audio");
961 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
962 print("%s\n", mime? "video/avi": "avi video");
964 print("%s\n", mime? OCTET : "riff file");
970 char* html_string[] = {
972 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
974 "button", "center", "iframe", "object", "option", "script",
976 "blink", "embed", "frame", "input", "label", "param", "small",
977 "style", "table", "tbody", "tfoot", "thead", "title",
978 "?xml", "body", "code", "font", "form", "head", "html",
979 "link", "menu", "meta", "span",
980 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
981 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
982 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
983 "a", "b", "i", "p", "q", "u",
993 if((p = strstr(p, "diff")) != nil)
994 if((p = strchr(p, '\n')) != nil)
995 if(strncmp(++p, "--- ", 4) == 0)
996 if((p = strchr(p, '\n')) != nil)
997 if(strncmp(++p, "+++ ", 4) == 0)
998 if((p = strchr(p, '\n')) != nil)
999 if(strncmp(++p, "@@ ", 3) == 0){
1000 print("%s\n", mime ? "text/plain" : "unified diff output");
1015 while(p < buf+nbuf && *p != '<')
1024 for(i = 0; html_string[i]; i++){
1025 n = strlen(html_string[i]);
1026 if(p + n > buf+nbuf)
1028 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1030 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1032 print("%s\n", mime ? "text/html" : "HTML file");
1043 char* rfc822_string[] =
1065 q = strchr(p, '\n');
1069 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1076 if(*p != '\t' && *p != ' '){
1080 for(i = 0; rfc822_string[i]; i++) {
1081 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1090 print("%s\n", mime ? "message/rfc822" : "email file");
1102 q = strchr(p, '\n');
1106 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1107 print("%s\n", mime ? PLAIN : "mail box");
1121 if(Binit(&b, fd, OREAD) == Beof)
1124 type = objtype(&b, &name);
1128 print("%s\n", OCTET);
1130 print("%s intermediate\n", name);
1143 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1145 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1150 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1155 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1161 print("%s\n", PLAIN);
1164 if(wfreq[Alword] > 0)
1165 print("alef program\n");
1167 print("c program\n");
1177 if(wfreq[Lword] < 4)
1179 print("%s\n", mime ? PLAIN : "limbo program");
1189 if(wfreq[Aword] < 2)
1191 print("%s\n", mime ? PLAIN : "as program");
1203 if((p[12] | p[13]<<8) == 0) /* width */
1205 if((p[14] | p[15]<<8) == 0) /* height */
1207 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1209 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1211 if(p[1] == 0){ /* non color-mapped */
1212 if((p[2]&3) != 2 && (p[2]&3) != 3)
1214 if((p[5] | p[6]<<8) != 0) /* palette length */
1217 if(p[1] == 1){ /* color-mapped */
1218 if((p[2]&3) != 1 || p[7] == 0)
1220 if((p[5] | p[6]<<8) == 0) /* palette length */
1224 print("%s\n", mime ? "image/tga" : "targa image");
1235 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1236 if((p[1] & 0xFE) == 0xFA){
1237 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1246 * low entropy means encrypted
1256 memset(bucket, 0, sizeof(bucket));
1257 for(i=nbuf-64; i<nbuf; i++)
1258 bucket[(buf[i]>>5)&07] += 1;
1262 cs += (bucket[i]-8)*(bucket[i]-8);
1265 if(buf[0]==0x1f && buf[1]==0x9d)
1266 print("%s\n", mime ? "application/x-compress" : "compressed");
1268 if(buf[0]==0x1f && buf[1]==0x8b)
1269 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1271 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1272 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1274 if(buf[0]==0x78 && buf[1]==0x9c)
1275 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1277 print("%s\n", mime ? OCTET : "encrypted");
1284 * english by punctuation and frequencies
1289 int vow, comm, rare, badpun, punct;
1292 if(guess != Fascii && guess != Feascii)
1296 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1306 if(p[1] != ' ' && p[1] != '\n')
1309 if(badpun*5 > punct)
1311 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1313 if(2*cfreq[';'] > cfreq['e'])
1317 for(p="AEIOU"; *p; p++) {
1319 vow += cfreq[tolower(*p)];
1322 for(p="ETAION"; *p; p++) {
1324 comm += cfreq[tolower(*p)];
1327 for(p="VJKQXZ"; *p; p++) {
1329 rare += cfreq[tolower(*p)];
1331 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1332 print("%s\n", mime ? PLAIN : "English text");
1339 * pick up a number with
1344 p9bitnum(char *s, int *v)
1348 if(s[P9BITLEN-1] != ' ')
1350 s[P9BITLEN-1] = '\0';
1351 *v = strtol(s, &es, 10);
1352 s[P9BITLEN-1] = ' ';
1353 if(es != &s[P9BITLEN-1])
1359 depthof(char *s, int *newp)
1366 while(s<es && *s==' ')
1370 if('0'<=*s && *s<='9')
1371 return 1<<strtol(s, nil, 0);
1375 while(s<es && *s!=' '){
1376 if(strchr("rgbkamx", *s) == nil)
1379 if('0'<=*s && *s<='9')
1380 d += strtoul(s, &s, 10);
1385 if(d % 8 == 0 || 8 % d == 0)
1394 int dep, lox, loy, hix, hiy, px, new, cmpr;
1401 if(memcmp(cp, "compressed\n", 11) == 0) {
1406 if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1408 newlabel = new ? "" : "old ";
1409 if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1411 if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1413 if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1415 if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1420 if(hix <= 0 || hiy <= 0)
1424 px = 8/dep; /* pixels per byte */
1425 /* set l to number of bytes of data per scan line */
1426 len = (hix+px-1)/px;
1429 len *= hiy; /* col length */
1430 len += 5 * P9BITLEN; /* size of initial ascii */
1433 * for compressed images, don't look any further. otherwise:
1434 * for image file, length is non-zero and must match calculation above.
1435 * for /dev/window and /dev/screen the length is always zero.
1436 * for subfont, the subfont header should follow immediately.
1439 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1440 newlabel, dep, hix, hiy);
1444 * mbuf->length == 0 probably indicates reading a pipe.
1445 * Ghostscript sometimes produces a little extra on the end.
1447 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1448 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1449 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1450 newlabel, dep, hix, hiy);
1453 if (p9subfont(buf+len)) {
1454 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1455 newlabel, dep, hix, hiy);
1466 /* if image too big, assume it's a subfont */
1467 if (p+3*P9BITLEN > buf+sizeof(buf))
1470 if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0) /* char count */
1472 if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0) /* height */
1474 if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0) /* ascent */
1476 if(n > 0 && h > 0 && a >= 0)
1481 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1488 char pathname[1024];
1491 if (!getfontnum(cp, &cp)) /* height */
1493 if (!getfontnum(cp, &cp)) /* ascent */
1495 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1496 if (!getfontnum(cp, &cp)) /* min */
1498 if (!getfontnum(cp, &cp)) /* max */
1500 getfontnum(cp, &cp); /* optional offset */
1501 while (WHITESPACE(*cp))
1503 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1505 /* construct a path name, if needed */
1507 if (*p != '/' && slash) {
1509 if (n < sizeof(pathname))
1510 memcpy(pathname, fname, n);
1513 if (n+cp-p+4 < sizeof(pathname)) {
1514 memcpy(pathname+n, p, cp-p);
1517 if (access(pathname, AEXIST) < 0) {
1518 strcpy(pathname+n, ".0");
1519 if (access(pathname, AEXIST) < 0)
1525 print("%s\n", mime ? PLAIN : "font file");
1532 getfontnum(uchar *cp, uchar **rp)
1534 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1536 if (*cp < '0' || *cp > '9')
1538 strtoul((char *)cp, (char **)rp, 0);
1539 if (!WHITESPACE(**rp)) {
1549 if(strstr((char *)buf, "\\rtf1")){
1550 print(mime ? "application/rtf\n" : "rich text format\n");
1559 if (buf[0] == 0x4d && buf[1] == 0x5a){
1560 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1569 if(buf[0] || buf[1] || buf[3] || buf[9])
1571 if(buf[4] == 0x00 && buf[5] == 0x00)
1575 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1578 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1587 static char *cpu[] = { /* NB: incomplete and arbitary list */
1610 static char *type[] = {
1611 [1] "relocatable object",
1613 [3] "shared library",
1617 if (memcmp(buf, "\x7fELF", 4) == 0){
1620 int n = (buf[19] << 8) | buf[18];
1621 char *p = "unknown";
1622 char *t = "unknown";
1624 if (n > 0 && n < nelem(cpu) && cpu[n])
1627 /* try the other byte order */
1629 n = (buf[18] << 8) | buf[19];
1630 if (n > 0 && n < nelem(cpu) && cpu[n])
1634 n = (buf[16]<< 8) | buf[17];
1636 n = (buf[17]<< 8) | buf[16];
1638 if(n>0 && n < nelem(type) && type[n])
1640 print("%s ELF %s\n", p, t);
1643 print("application/x-elf-executable\n");
1653 int i, j, ldepth, l;
1657 for(j = 0; j < 3; j++){
1658 for(p = (char*)buf, i=0; i<3; i++){
1659 if(p[0] != '0' || p[1] != 'x')
1663 else if(buf[2+4] == ',')
1674 while(*p == ' ' || *p == '\t')
1682 print("application/x-face\n");
1684 print("face image depth %d\n", ldepth);