8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
170 int p9bitnum(char*, int*);
171 int p9subfont(uchar*);
172 void print_utf(void);
173 void type(char*, int);
177 int (*call[])(void) =
179 long0, /* recognizable by first 4 bytes */
180 istring, /* recognizable by first string */
181 iself, /* ELF (foreign) executable */
182 isexec, /* native executables */
183 iff, /* interchange file format (strings) */
184 longoff, /* recognizable by 4 bytes at some offset */
185 isoffstr, /* recognizable by string at some offset */
186 isrfc822, /* email file */
187 ismbox, /* mail box */
188 istar, /* recognizable by tar checksum */
189 iscint, /* compiler/assembler intermediate */
190 ishtml, /* html keywords */
191 islimbo, /* limbo source */
192 isc, /* c & alef compiler key words */
193 isas, /* assembler key words */
194 isp9font, /* plan 9 font */
195 isp9bit, /* plan 9 image (as from /dev/window) */
196 isrtf, /* rich text format */
197 ismsdos, /* msdos exe (virus file attachement) */
198 isicocur, /* windows icon or cursor file */
199 isface, /* ascii face file */
204 ismung, /* entropy compressed/encrypted */
205 isenglish, /* char frequency English */
211 char OCTET[] = "application/octet-stream";
212 char PLAIN[] = "text/plain";
215 main(int argc, char *argv[])
226 fprint(2, "usage: file [-m] [file...]\n");
231 if(mime == 0 || argc > 1){
232 for(i = 0; i < argc; i++) {
233 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
245 for(i = 0; i < argc; i++)
246 type(argv[i], maxlen);
252 type(char *file, int nlen)
260 for (i = 0, p = file; *p; i++) {
261 if (*p == '/') /* find rightmost slash */
263 p += chartorune(&r, p); /* count runes */
265 print("%s:%*s",file, nlen-i+1, "");
268 if ((fd = open(file, OREAD)) < 0) {
269 fprint(2, "cannot open: %r\n");
287 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
292 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
297 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298 memmove(buf, buf+3, nbuf-3);
302 if(memcmp(buf, "\xFE\xFF", 2) == 0){
308 memmove(rb, buf+2, nbuf);
310 e = p+sizeof(buf)-UTFmax-1;
311 for(i=0; i<nbuf && p < e; i+=2){
312 r = rb[i+1] | rb[i]<<8;
313 p += runetochar(p, &r);
317 nbuf = p - (char*)buf;
319 if(memcmp(buf, "\xFF\xFE", 2) == 0){
325 memmove(rb, buf+2, nbuf);
327 e = p+sizeof(buf)-UTFmax-1;
328 for(i=0; i<nbuf && p < e; i+=2){
329 r = rb[i] | rb[i+1]<<8;
330 p += runetochar(p, &r);
334 nbuf = p - (char*)buf;
348 fprint(2, "cannot stat: %r\n");
351 if(mbuf->mode & DMDIR) {
352 print("%s\n", mime ? OCTET : "directory");
355 if(mbuf->type != 'M' && mbuf->type != '|') {
357 print("%s\n", OCTET);
359 print("special file #%C/%s\n", mbuf->type, mbuf->name);
362 /* may be reading a pipe on standard input */
363 nbuf = readn(fd, buf, sizeof(buf)-1);
365 fprint(2, "cannot read: %r\n");
369 print("%s\n", mime ? PLAIN : "empty file");
377 * build histogram table
379 memset(cfreq, 0, sizeof(cfreq));
380 for (i = 0; language[i].name; i++)
381 language[i].count = 0;
382 eob = (char *)buf+nbuf;
383 for(n = 0, p = (char *)buf; p < eob; n++) {
384 if (!fullrune(p, eob-p) && eob-p < UTFmax)
386 p += chartorune(&r, p);
389 else if (r <= 0x7f) {
390 if (!isprint(r) && !isspace(r))
391 f = Ceascii; /* ASCII control char */
393 } else if (r == 0x80) {
397 f = Cbinary; /* Invalid Runes */
399 f = Clatin; /* Latin 1 */
402 f = Cutf; /* UTF extension */
404 cfreq[f]++; /* ASCII chars peg directly */
411 else if (cfreq[Cutf])
413 else if (cfreq[Clatin])
415 else if (cfreq[Ceascii])
417 else if (cfreq[Cnull])
422 * lookup dictionary words
424 memset(wfreq, 0, sizeof(wfreq));
425 if(guess == Fascii || guess == Flatin || guess == Futf)
428 * call individual classify routines
430 for(i=0; call[i]; i++)
436 * print out gross classification
438 if (nbuf < 100 && !mime)
439 print(mime ? PLAIN : "short ");
441 print("%s\n", mime ? PLAIN : "Ascii");
442 else if (guess == Feascii)
443 print("%s\n", mime ? PLAIN : "extended ascii");
444 else if (guess == Flatin)
445 print("%s\n", mime ? PLAIN : "latin ascii");
446 else if (guess == Futf && utf_count() < 4)
448 else print("%s\n", mime ? OCTET : "binary");
452 bump_utf_count(Rune r)
456 high = sizeof(language)/sizeof(language[0])-1;
457 for (low = 0; low < high;) {
459 if (r >= language[mid].low) {
460 if (r <= language[mid].high) {
461 language[mid].count++;
474 for (i = 0; language[i].name; i++)
475 if (language[i].count > 0)
476 switch (language[i].mode) {
492 for (i = 'a'; i < 'z'; i++)
495 for (i = 'A'; i < 'Z'; i++)
502 find_first(char *name)
506 for (i = 0; language[i].name != 0; i++)
507 if (language[i].mode == First
508 && strcmp(language[i].name, name) == 0)
519 print("%s\n", PLAIN);
527 for (i = 0; language[i].name; i++)
528 if (language[i].count) {
529 switch(language[i].mode) {
531 j = find_first(language[i].name);
534 if (language[j].count > 0)
542 print("%s", language[i].name);
557 int low, high, mid, r;
562 while (p < buf+nbuf && !isalpha(*p))
567 while(p < buf+nbuf && isalpha(*p))
571 high = sizeof(dict)/sizeof(dict[0]);
572 for(low = 0;low < high;) {
574 r = strcmp(dict[mid].word, (char*)p2);
576 wfreq[dict[mid].class]++;
588 typedef struct Filemagic Filemagic;
597 * integers in this table must be as seen on a little-endian machine
598 * when read from a file.
600 Filemagic long0tab[] = {
601 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
603 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
605 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
606 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
607 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
608 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
609 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
610 070707, 0xFFFF, "cpio archive", "application/x-cpio",
611 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
612 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
613 0xf0ff, 0xf6ff, "aac audio", "audio/mpeg",
614 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
617 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
619 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
621 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
623 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
625 * venti & fossil magic numbers are stored big-endian on disk,
626 * thus the numbers appear reversed in this table.
628 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
629 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
630 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET,
631 /* tcpdump pcap file */
632 0xa1b2c3d4, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
633 0xd4c3b2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
634 0xa1b23c4d, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
635 0x4d3cb2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
639 filemagic(Filemagic *tab, int ntab, ulong x)
643 for(i=0; i<ntab; i++)
644 if((x&tab[i].mask) == tab[i].x){
645 print("%s\n", mime ? tab[i].mime : tab[i].desc);
654 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
657 typedef struct Fileoffmag Fileoffmag;
664 * integers in this table must be as seen on a little-endian machine
665 * when read from a file.
667 Fileoffmag longofftab[] = {
669 * venti & fossil magic numbers are stored big-endian on disk,
670 * thus the numbers appear reversed in this table.
672 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
673 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
674 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
675 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
676 0x100, 0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
677 0x1fc, 0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
681 fileoffmagic(Fileoffmag *tab, int ntab)
686 uchar buf[sizeof(long)];
688 for(i=0; i<ntab; i++) {
690 seek(fd, tp->off, 0);
691 if (readn(fd, buf, sizeof buf) != sizeof buf)
694 if((x&tp->mask) == tp->x){
695 print("%s\n", mime ? tp->mime : tp->desc);
705 return fileoffmagic(longofftab, nelem(longofftab));
713 seek(fd, 0, 0); /* reposition to start of file */
714 if(crackhdr(fd, &f)) {
715 print("%s\n", mime ? OCTET : f.name);
723 enum { NAMSIZ = 100, TBLOCK = 512 };
738 char linkname[NAMSIZ];
739 /* rest are defined by POSIX's ustar format; see p1003.2b */
740 char magic[6]; /* "ustar" */
746 char prefix[155]; /* if non-null, path = prefix "/" name */
751 checksum(union hblock *hp)
755 struct header *hdr = &hp->dbuf;
757 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
760 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
770 union hblock *hp = (union hblock *)tblock;
771 struct header *hdr = &hp->dbuf;
773 seek(fd, 0, 0); /* reposition to start of file */
774 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
776 chksum = strtol(hdr->chksum, 0, 8);
777 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
778 if (strcmp(hdr->magic, "ustar") == 0)
779 print(mime? "application/x-ustar\n": "posix tar archive\n");
781 print(mime? "application/x-tar\n": "tar archive\n");
788 * initial words to classify file
798 "\x1f\x9d", "compressed", 2, "application/x-compress",
799 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
800 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
801 "!<arch>\n__.SYMDEF", "archive random library", 16, OCTET,
802 "!<arch>\n", "archive", 8, OCTET,
803 "070707", "cpio archive - ascii header", 6, OCTET,
804 "#!/bin/rc", "rc executable file", 9, PLAIN,
805 "#!/bin/sh", "sh executable file", 9, PLAIN,
806 "%!", "postscript", 2, "application/postscript",
807 "\004%!", "postscript", 3, "application/postscript",
808 "x T post", "troff output for post", 8, "application/troff",
809 "x T Latin1", "troff output for Latin1", 10, "application/troff",
810 "x T utf", "troff output for UTF", 7, "application/troff",
811 "x T 202", "troff output for 202", 7, "application/troff",
812 "x T aps", "troff output for aps", 7, "application/troff",
813 "x T ", "troff output", 4, "application/troff",
814 "GIF", "GIF image", 3, "image/gif",
815 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
816 "%PDF", "PDF", 4, "application/pdf",
817 "<!DOCTYPE", "HTML file", 9, "text/html",
818 "<!doctype", "HTML file", 9, "text/html",
819 "<!--", "HTML file", 4, "text/html",
820 "<html>", "HTML file", 6, "text/html",
821 "<HTML>", "HTML file", 6, "text/html",
822 "<?xml", "HTML file", 5, "text/html",
823 "\111\111\052\000", "tiff", 4, "image/tiff",
824 "\115\115\000\052", "tiff", 4, "image/tiff",
825 "\377\330\377\340", "jpeg", 4, "image/jpeg",
826 "\377\330\377\341", "jpeg", 4, "image/jpeg",
827 "\377\330\377\333", "jpeg", 4, "image/jpeg",
828 "\xff\xd8", "jpeg", 2, "image/jpeg",
829 "BM", "bmp", 2, "image/bmp",
830 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
831 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
832 "\033E\033", "HP PCL printer data", 3, OCTET,
833 "\033&", "HP PCL printer data", 2, OCTET,
834 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
835 "\033Lua", "Lua bytecode", 4, OCTET,
836 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
837 "OggS", "ogg audio", 4, "audio/ogg",
838 ".snd", "sun audio", 4, "audio/basic",
839 "\211PNG", "PNG image", 4, "image/png",
840 "P1\n", "ppm", 3, "image/ppm",
841 "P2\n", "ppm", 3, "image/ppm",
842 "P3\n", "ppm", 3, "image/ppm",
843 "P4\n", "ppm", 3, "image/ppm",
844 "P5\n", "ppm", 3, "image/ppm",
845 "P6\n", "ppm", 3, "image/ppm",
846 "/* XPM */\n", "xbm", 10, "image/xbm",
847 ".HTML ", "troff -ms input", 6, "text/troff",
848 ".LP", "troff -ms input", 3, "text/troff",
849 ".ND", "troff -ms input", 3, "text/troff",
850 ".PP", "troff -ms input", 3, "text/troff",
851 ".TL", "troff -ms input", 3, "text/troff",
852 ".TR", "troff -ms input", 3, "text/troff",
853 ".TH", "manual page", 3, "text/troff",
854 ".\\\"", "troff input", 3, "text/troff",
855 ".de", "troff input", 3, "text/troff",
856 ".if", "troff input", 3, "text/troff",
857 ".nr", "troff input", 3, "text/troff",
858 ".tr", "troff input", 3, "text/troff",
859 "vac:", "venti score", 4, PLAIN,
860 "-----BEGIN CERTIFICATE-----\n",
861 "pem certificate", -1, PLAIN,
862 "-----BEGIN TRUSTED CERTIFICATE-----\n",
863 "pem trusted certificate", -1, PLAIN,
864 "-----BEGIN X509 CERTIFICATE-----\n",
865 "pem x.509 certificate", -1, PLAIN,
866 "subject=/C=", "pem certificate with header", -1, PLAIN,
867 "process snapshot ", "process snapshot", -1, "application/snapfs",
868 "d8:announce", "torrent file", 11, "application/x-bittorrent",
869 "[playlist]", "playlist", 10, "application/x-scpls",
870 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
871 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
872 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
873 "AT&T", "DjVu document", 4, "image/vnd.djvu",
874 "Extended module: ", "XM audio", 17, "audio/xm",
875 "MThd", "midi audio", 4, "audio/midi",
876 "MUS\x1a", "mus audio", 4, "audio/mus",
884 struct FILE_STRING *p;
886 for(p = file_string; p->key; p++) {
890 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
891 print("%s\n", mime ? p->mime : p->filetype);
895 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
896 for(i = 5; i < nbuf; i++)
900 print("%s\n", OCTET);
902 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
913 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
914 32*4, "DICM", "DICOM medical imaging data", 4, "application/dicom",
925 for(p = offstrs; p->key; p++) {
930 if (readn(fd, buf, n) != n)
932 if(memcmp(buf, p->key, n) == 0) {
933 print("%s\n", mime ? p->mime : p->filetype);
943 if (strncmp((char*)buf, "FORM", 4) == 0 &&
944 strncmp((char*)buf+8, "AIFF", 4) == 0) {
945 print("%s\n", mime? "audio/x-aiff": "aiff audio");
948 if (strncmp((char*)buf, "RIFF", 4) == 0) {
949 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
950 print("%s\n", mime? "audio/wave": "wave audio");
951 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
952 print("%s\n", mime? "video/avi": "avi video");
954 print("%s\n", mime? OCTET : "riff file");
960 char* html_string[] = {
962 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
964 "button", "center", "iframe", "object", "option", "script",
966 "blink", "embed", "frame", "input", "label", "param", "small",
967 "style", "table", "tbody", "tfoot", "thead", "title",
968 "?xml", "body", "code", "font", "form", "head", "html",
969 "link", "menu", "meta", "span",
970 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
971 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
972 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
973 "a", "b", "i", "p", "q", "u",
986 while(p < buf+nbuf && *p != '<')
995 for(i = 0; html_string[i]; i++){
996 n = strlen(html_string[i]);
999 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1001 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1003 print("%s\n", mime ? "text/html" : "HTML file");
1014 char* rfc822_string[] =
1036 q = strchr(p, '\n');
1040 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1047 if(*p != '\t' && *p != ' '){
1051 for(i = 0; rfc822_string[i]; i++) {
1052 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1061 print("%s\n", mime ? "message/rfc822" : "email file");
1073 q = strchr(p, '\n');
1077 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1078 print("%s\n", mime ? PLAIN : "mail box");
1092 if(Binit(&b, fd, OREAD) == Beof)
1095 type = objtype(&b, &name);
1099 print("%s\n", OCTET);
1101 print("%s intermediate\n", name);
1114 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1116 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1121 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1126 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1132 print("%s\n", PLAIN);
1135 if(wfreq[Alword] > 0)
1136 print("alef program\n");
1138 print("c program\n");
1148 if(wfreq[Lword] < 4)
1150 print("%s\n", mime ? PLAIN : "limbo program");
1160 if(wfreq[Aword] < 2)
1162 print("%s\n", mime ? PLAIN : "as program");
1174 if((p[12] | p[13]<<8) == 0) /* width */
1176 if((p[14] | p[15]<<8) == 0) /* height */
1178 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1180 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1182 if(p[1] == 0){ /* non color-mapped */
1183 if((p[2]&3) != 2 && (p[2]&3) != 3)
1185 if((p[5] | p[6]<<8) != 0) /* palette length */
1188 if(p[1] == 1){ /* color-mapped */
1189 if((p[2]&3) != 1 || p[7] == 0)
1191 if((p[5] | p[6]<<8) == 0) /* palette length */
1195 print("%s\n", mime ? "image/tga" : "targa image");
1206 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1207 if((p[1] & 0xFE) == 0xFA){
1208 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1217 * low entropy means encrypted
1227 memset(bucket, 0, sizeof(bucket));
1228 for(i=nbuf-64; i<nbuf; i++)
1229 bucket[(buf[i]>>5)&07] += 1;
1233 cs += (bucket[i]-8)*(bucket[i]-8);
1236 if(buf[0]==0x1f && buf[1]==0x9d)
1237 print("%s\n", mime ? "application/x-compress" : "compressed");
1239 if(buf[0]==0x1f && buf[1]==0x8b)
1240 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1242 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1243 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1245 if(buf[0]==0x78 && buf[1]==0x9c)
1246 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1248 print("%s\n", mime ? OCTET : "encrypted");
1255 * english by punctuation and frequencies
1260 int vow, comm, rare, badpun, punct;
1263 if(guess != Fascii && guess != Feascii)
1267 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1277 if(p[1] != ' ' && p[1] != '\n')
1280 if(badpun*5 > punct)
1282 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1284 if(2*cfreq[';'] > cfreq['e'])
1288 for(p="AEIOU"; *p; p++) {
1290 vow += cfreq[tolower(*p)];
1293 for(p="ETAION"; *p; p++) {
1295 comm += cfreq[tolower(*p)];
1298 for(p="VJKQXZ"; *p; p++) {
1300 rare += cfreq[tolower(*p)];
1302 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1303 print("%s\n", mime ? PLAIN : "English text");
1310 * pick up a number with
1315 p9bitnum(char *s, int *v)
1319 if(s[P9BITLEN-1] != ' ')
1321 s[P9BITLEN-1] = '\0';
1322 *v = strtol(s, &es, 10);
1323 s[P9BITLEN-1] = ' ';
1324 if(es != &s[P9BITLEN-1])
1330 depthof(char *s, int *newp)
1337 while(s<es && *s==' ')
1341 if('0'<=*s && *s<='9')
1342 return 1<<strtol(s, nil, 0);
1346 while(s<es && *s!=' '){
1347 if(strchr("rgbkamx", *s) == nil)
1350 if('0'<=*s && *s<='9')
1351 d += strtoul(s, &s, 10);
1356 if(d % 8 == 0 || 8 % d == 0)
1365 int dep, lox, loy, hix, hiy, px, new, cmpr;
1372 if(memcmp(cp, "compressed\n", 11) == 0) {
1377 if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1379 newlabel = new ? "" : "old ";
1380 if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1382 if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1384 if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1386 if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1391 if(hix <= 0 || hiy <= 0)
1395 px = 8/dep; /* pixels per byte */
1396 /* set l to number of bytes of data per scan line */
1397 len = (hix+px-1)/px;
1400 len *= hiy; /* col length */
1401 len += 5 * P9BITLEN; /* size of initial ascii */
1404 * for compressed images, don't look any further. otherwise:
1405 * for image file, length is non-zero and must match calculation above.
1406 * for /dev/window and /dev/screen the length is always zero.
1407 * for subfont, the subfont header should follow immediately.
1410 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1411 newlabel, dep, hix, hiy);
1415 * mbuf->length == 0 probably indicates reading a pipe.
1416 * Ghostscript sometimes produces a little extra on the end.
1418 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1419 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1420 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1421 newlabel, dep, hix, hiy);
1424 if (p9subfont(buf+len)) {
1425 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1426 newlabel, dep, hix, hiy);
1437 /* if image too big, assume it's a subfont */
1438 if (p+3*P9BITLEN > buf+sizeof(buf))
1441 if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0) /* char count */
1443 if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0) /* height */
1445 if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0) /* ascent */
1447 if(n > 0 && h > 0 && a >= 0)
1452 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1459 char pathname[1024];
1462 if (!getfontnum(cp, &cp)) /* height */
1464 if (!getfontnum(cp, &cp)) /* ascent */
1466 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1467 if (!getfontnum(cp, &cp)) /* min */
1469 if (!getfontnum(cp, &cp)) /* max */
1471 getfontnum(cp, &cp); /* optional offset */
1472 while (WHITESPACE(*cp))
1474 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1476 /* construct a path name, if needed */
1478 if (*p != '/' && slash) {
1480 if (n < sizeof(pathname))
1481 memcpy(pathname, fname, n);
1484 if (n+cp-p+4 < sizeof(pathname)) {
1485 memcpy(pathname+n, p, cp-p);
1488 if (access(pathname, AEXIST) < 0) {
1489 strcpy(pathname+n, ".0");
1490 if (access(pathname, AEXIST) < 0)
1496 print("%s\n", mime ? PLAIN : "font file");
1503 getfontnum(uchar *cp, uchar **rp)
1505 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1507 if (*cp < '0' || *cp > '9')
1509 strtoul((char *)cp, (char **)rp, 0);
1510 if (!WHITESPACE(**rp)) {
1520 if(strstr((char *)buf, "\\rtf1")){
1521 print(mime ? "application/rtf\n" : "rich text format\n");
1530 if (buf[0] == 0x4d && buf[1] == 0x5a){
1531 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1540 if(buf[0] || buf[1] || buf[3] || buf[9])
1542 if(buf[4] == 0x00 && buf[5] == 0x00)
1546 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1549 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1558 static char *cpu[] = { /* NB: incomplete and arbitary list */
1581 static char *type[] = {
1582 [1] "relocatable object",
1584 [3] "shared library",
1588 if (memcmp(buf, "\x7fELF", 4) == 0){
1591 int n = (buf[19] << 8) | buf[18];
1592 char *p = "unknown";
1593 char *t = "unknown";
1595 if (n > 0 && n < nelem(cpu) && cpu[n])
1598 /* try the other byte order */
1600 n = (buf[18] << 8) | buf[19];
1601 if (n > 0 && n < nelem(cpu) && cpu[n])
1605 n = (buf[16]<< 8) | buf[17];
1607 n = (buf[17]<< 8) | buf[16];
1609 if(n>0 && n < nelem(type) && type[n])
1611 print("%s ELF %s\n", p, t);
1614 print("application/x-elf-executable\n");
1624 int i, j, ldepth, l;
1628 for(j = 0; j < 3; j++){
1629 for(p = (char*)buf, i=0; i<3; i++){
1630 if(p[0] != '0' || p[1] != 'x')
1634 else if(buf[2+4] == ',')
1645 while(*p == ' ' || *p == '\t')
1653 print("application/x-face\n");
1655 print("face image depth %d\n", ldepth);