8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
170 int p9bitnum(uchar*);
171 int p9subfont(uchar*);
172 void print_utf(void);
173 void type(char*, int);
177 int (*call[])(void) =
179 long0, /* recognizable by first 4 bytes */
180 istring, /* recognizable by first string */
181 iself, /* ELF (foreign) executable */
182 isexec, /* native executables */
183 iff, /* interchange file format (strings) */
184 longoff, /* recognizable by 4 bytes at some offset */
185 isoffstr, /* recognizable by string at some offset */
186 isrfc822, /* email file */
187 ismbox, /* mail box */
188 istar, /* recognizable by tar checksum */
189 iscint, /* compiler/assembler intermediate */
190 ishtml, /* html keywords */
191 islimbo, /* limbo source */
192 isc, /* c & alef compiler key words */
193 isas, /* assembler key words */
194 isp9font, /* plan 9 font */
195 isp9bit, /* plan 9 image (as from /dev/window) */
196 isrtf, /* rich text format */
197 ismsdos, /* msdos exe (virus file attachement) */
198 isicocur, /* windows icon or cursor file */
199 isface, /* ascii face file */
204 ismung, /* entropy compressed/encrypted */
205 isenglish, /* char frequency English */
211 char OCTET[] = "application/octet-stream";
212 char PLAIN[] = "text/plain";
215 main(int argc, char *argv[])
226 fprint(2, "usage: file [-m] [file...]\n");
231 if(mime == 0 || argc > 1){
232 for(i = 0; i < argc; i++) {
233 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
245 for(i = 0; i < argc; i++)
246 type(argv[i], maxlen);
252 type(char *file, int nlen)
260 for (i = 0, p = file; *p; i++) {
261 if (*p == '/') /* find rightmost slash */
263 p += chartorune(&r, p); /* count runes */
265 print("%s:%*s",file, nlen-i+1, "");
268 if ((fd = open(file, OREAD)) < 0) {
269 print("cannot open: %r\n");
287 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
292 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
297 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298 memmove(buf, buf+3, nbuf-3);
302 if(memcmp(buf, "\xFE\xFF", 2) == 0){
308 memmove(rb, buf+2, nbuf);
310 e = p+sizeof(buf)-UTFmax-1;
311 for(i=0; i<nbuf && p < e; i+=2){
312 r = rb[i+1] | rb[i]<<8;
313 p += runetochar(p, &r);
317 nbuf = p - (char*)buf;
319 if(memcmp(buf, "\xFF\xFE", 2) == 0){
325 memmove(rb, buf+2, nbuf);
327 e = p+sizeof(buf)-UTFmax-1;
328 for(i=0; i<nbuf && p < e; i+=2){
329 r = rb[i] | rb[i+1]<<8;
330 p += runetochar(p, &r);
334 nbuf = p - (char*)buf;
348 print("cannot stat: %r\n");
351 if(mbuf->mode & DMDIR) {
352 print("%s\n", mime ? OCTET : "directory");
355 if(mbuf->type != 'M' && mbuf->type != '|') {
357 print("%s\n", OCTET);
359 print("special file #%C/%s\n", mbuf->type, mbuf->name);
362 /* may be reading a pipe on standard input */
363 nbuf = readn(fd, buf, sizeof(buf)-1);
365 print("cannot read: %r\n");
369 print("%s\n", mime ? PLAIN : "empty file");
377 * build histogram table
379 memset(cfreq, 0, sizeof(cfreq));
380 for (i = 0; language[i].name; i++)
381 language[i].count = 0;
382 eob = (char *)buf+nbuf;
383 for(n = 0, p = (char *)buf; p < eob; n++) {
384 if (!fullrune(p, eob-p) && eob-p < UTFmax)
386 p += chartorune(&r, p);
389 else if (r <= 0x7f) {
390 if (!isprint(r) && !isspace(r))
391 f = Ceascii; /* ASCII control char */
393 } else if (r == 0x80) {
397 f = Cbinary; /* Invalid Runes */
399 f = Clatin; /* Latin 1 */
402 f = Cutf; /* UTF extension */
404 cfreq[f]++; /* ASCII chars peg directly */
411 else if (cfreq[Cutf])
413 else if (cfreq[Clatin])
415 else if (cfreq[Ceascii])
417 else if (cfreq[Cnull])
422 * lookup dictionary words
424 memset(wfreq, 0, sizeof(wfreq));
425 if(guess == Fascii || guess == Flatin || guess == Futf)
428 * call individual classify routines
430 for(i=0; call[i]; i++)
436 * print out gross classification
438 if (nbuf < 100 && !mime)
439 print(mime ? PLAIN : "short ");
441 print("%s\n", mime ? PLAIN : "Ascii");
442 else if (guess == Feascii)
443 print("%s\n", mime ? PLAIN : "extended ascii");
444 else if (guess == Flatin)
445 print("%s\n", mime ? PLAIN : "latin ascii");
446 else if (guess == Futf && utf_count() < 4)
448 else print("%s\n", mime ? OCTET : "binary");
452 bump_utf_count(Rune r)
456 high = sizeof(language)/sizeof(language[0])-1;
457 for (low = 0; low < high;) {
459 if (r >= language[mid].low) {
460 if (r <= language[mid].high) {
461 language[mid].count++;
474 for (i = 0; language[i].name; i++)
475 if (language[i].count > 0)
476 switch (language[i].mode) {
492 for (i = 'a'; i < 'z'; i++)
495 for (i = 'A'; i < 'Z'; i++)
502 find_first(char *name)
506 for (i = 0; language[i].name != 0; i++)
507 if (language[i].mode == First
508 && strcmp(language[i].name, name) == 0)
519 print("%s\n", PLAIN);
527 for (i = 0; language[i].name; i++)
528 if (language[i].count) {
529 switch(language[i].mode) {
531 j = find_first(language[i].name);
534 if (language[j].count > 0)
542 print("%s", language[i].name);
557 int low, high, mid, r;
562 while (p < buf+nbuf && !isalpha(*p))
567 while(p < buf+nbuf && isalpha(*p))
571 high = sizeof(dict)/sizeof(dict[0]);
572 for(low = 0;low < high;) {
574 r = strcmp(dict[mid].word, (char*)p2);
576 wfreq[dict[mid].class]++;
588 typedef struct Filemagic Filemagic;
597 * integers in this table must be as seen on a little-endian machine
598 * when read from a file.
600 Filemagic long0tab[] = {
601 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
603 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
605 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
606 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
607 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
608 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
609 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
610 070707, 0xFFFF, "cpio archive", "application/x-cpio",
611 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
612 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
613 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
614 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
616 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
618 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
620 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
622 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
624 * venti & fossil magic numbers are stored big-endian on disk,
625 * thus the numbers appear reversed in this table.
627 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
628 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
632 filemagic(Filemagic *tab, int ntab, ulong x)
636 for(i=0; i<ntab; i++)
637 if((x&tab[i].mask) == tab[i].x){
638 print("%s\n", mime ? tab[i].mime : tab[i].desc);
647 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
650 typedef struct Fileoffmag Fileoffmag;
657 * integers in this table must be as seen on a little-endian machine
658 * when read from a file.
660 Fileoffmag longofftab[] = {
662 * venti & fossil magic numbers are stored big-endian on disk,
663 * thus the numbers appear reversed in this table.
665 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
666 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
667 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
668 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
672 fileoffmagic(Fileoffmag *tab, int ntab)
677 uchar buf[sizeof(long)];
679 for(i=0; i<ntab; i++) {
681 seek(fd, tp->off, 0);
682 if (readn(fd, buf, sizeof buf) != sizeof buf)
685 if((x&tp->mask) == tp->x){
686 print("%s\n", mime ? tp->mime : tp->desc);
696 return fileoffmagic(longofftab, nelem(longofftab));
704 seek(fd, 0, 0); /* reposition to start of file */
705 if(crackhdr(fd, &f)) {
706 print("%s\n", mime ? OCTET : f.name);
714 enum { NAMSIZ = 100, TBLOCK = 512 };
729 char linkname[NAMSIZ];
730 /* rest are defined by POSIX's ustar format; see p1003.2b */
731 char magic[6]; /* "ustar" */
737 char prefix[155]; /* if non-null, path = prefix "/" name */
742 checksum(union hblock *hp)
746 struct header *hdr = &hp->dbuf;
748 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
751 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
761 union hblock *hp = (union hblock *)tblock;
762 struct header *hdr = &hp->dbuf;
764 seek(fd, 0, 0); /* reposition to start of file */
765 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
767 chksum = strtol(hdr->chksum, 0, 8);
768 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
769 if (strcmp(hdr->magic, "ustar") == 0)
770 print(mime? "application/x-ustar\n": "posix tar archive\n");
772 print(mime? "application/x-tar\n": "tar archive\n");
779 * initial words to classify file
789 "\x1f\x9d", "compressed", 2, "application/x-compress",
790 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
791 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
792 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
793 "!<arch>\n", "archive", 8, "application/octet-stream",
794 "070707", "cpio archive - ascii header", 6, "application/octet-stream",
795 "#!/bin/rc", "rc executable file", 9, "text/plain",
796 "#!/bin/sh", "sh executable file", 9, "text/plain",
797 "%!", "postscript", 2, "application/postscript",
798 "\004%!", "postscript", 3, "application/postscript",
799 "x T post", "troff output for post", 8, "application/troff",
800 "x T Latin1", "troff output for Latin1", 10, "application/troff",
801 "x T utf", "troff output for UTF", 7, "application/troff",
802 "x T 202", "troff output for 202", 7, "application/troff",
803 "x T aps", "troff output for aps", 7, "application/troff",
804 "x T ", "troff output", 4, "application/troff",
805 "GIF", "GIF image", 3, "image/gif",
806 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
807 "%PDF", "PDF", 4, "application/pdf",
808 "<!DOCTYPE", "HTML file", 9, "text/html",
809 "<!doctype", "HTML file", 9, "text/html",
810 "<!--", "HTML file", 4, "text/html",
811 "<html>", "HTML file", 6, "text/html",
812 "<HTML>", "HTML file", 6, "text/html",
813 "<?xml", "HTML file", 5, "text/html",
814 "\111\111\052\000", "tiff", 4, "image/tiff",
815 "\115\115\000\052", "tiff", 4, "image/tiff",
816 "\377\330\377\340", "jpeg", 4, "image/jpeg",
817 "\377\330\377\341", "jpeg", 4, "image/jpeg",
818 "\377\330\377\333", "jpeg", 4, "image/jpeg",
819 "\xff\xd8", "jpeg", 2, "image/jpeg",
820 "BM", "bmp", 2, "image/bmp",
821 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
822 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
823 "\033E\033", "HP PCL printer data", 3, OCTET,
824 "\033&", "HP PCL printer data", 2, OCTET,
825 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
826 "\033Lua", "Lua bytecode", 4, OCTET,
827 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
828 "OggS", "ogg audio", 4, "audio/ogg",
829 ".snd", "sun audio", 4, "audio/basic",
830 "\211PNG", "PNG image", 4, "image/png",
831 "P1\n", "ppm", 3, "image/ppm",
832 "P2\n", "ppm", 3, "image/ppm",
833 "P3\n", "ppm", 3, "image/ppm",
834 "P4\n", "ppm", 3, "image/ppm",
835 "P5\n", "ppm", 3, "image/ppm",
836 "P6\n", "ppm", 3, "image/ppm",
837 "/* XPM */\n", "xbm", 10, "image/xbm",
838 ".HTML ", "troff -ms input", 6, "text/troff",
839 ".LP", "troff -ms input", 3, "text/troff",
840 ".ND", "troff -ms input", 3, "text/troff",
841 ".PP", "troff -ms input", 3, "text/troff",
842 ".TL", "troff -ms input", 3, "text/troff",
843 ".TR", "troff -ms input", 3, "text/troff",
844 ".TH", "manual page", 3, "text/troff",
845 ".\\\"", "troff input", 3, "text/troff",
846 ".de", "troff input", 3, "text/troff",
847 ".if", "troff input", 3, "text/troff",
848 ".nr", "troff input", 3, "text/troff",
849 ".tr", "troff input", 3, "text/troff",
850 "vac:", "venti score", 4, "text/plain",
851 "-----BEGIN CERTIFICATE-----\n",
852 "pem certificate", -1, "text/plain",
853 "-----BEGIN TRUSTED CERTIFICATE-----\n",
854 "pem trusted certificate", -1, "text/plain",
855 "-----BEGIN X509 CERTIFICATE-----\n",
856 "pem x.509 certificate", -1, "text/plain",
857 "subject=/C=", "pem certificate with header", -1, "text/plain",
858 "process snapshot ", "process snapshot", -1, "application/snapfs",
859 "d8:announce", "torrent file", 11, "application/x-bittorrent",
860 "[playlist]", "playlist", 10, "application/x-scpls",
861 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
862 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
863 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
871 struct FILE_STRING *p;
873 for(p = file_string; p->key; p++) {
877 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
878 print("%s\n", mime ? p->mime : p->filetype);
882 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
883 for(i = 5; i < nbuf; i++)
887 print("%s\n", OCTET);
889 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
900 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
911 for(p = offstrs; p->key; p++) {
916 if (readn(fd, buf, n) != n)
918 if(memcmp(buf, p->key, n) == 0) {
919 print("%s\n", mime ? p->mime : p->filetype);
929 if (strncmp((char*)buf, "FORM", 4) == 0 &&
930 strncmp((char*)buf+8, "AIFF", 4) == 0) {
931 print("%s\n", mime? "audio/x-aiff": "aiff audio");
934 if (strncmp((char*)buf, "RIFF", 4) == 0) {
935 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
936 print("%s\n", mime? "audio/wave": "wave audio");
937 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
938 print("%s\n", mime? "video/avi": "avi video");
940 print("%s\n", mime? "application/octet-stream": "riff file");
946 char* html_string[] = {
948 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
950 "button", "center", "iframe", "object", "option", "script",
952 "blink", "embed", "frame", "input", "label", "param", "small",
953 "style", "table", "tbody", "tfoot", "thead", "title",
954 "?xml", "body", "code", "font", "form", "head", "html",
955 "link", "menu", "meta", "span",
956 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
957 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
958 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
959 "a", "b", "i", "p", "q", "u",
972 while(p < buf+nbuf && *p != '<')
981 for(i = 0; html_string[i]; i++){
982 n = strlen(html_string[i]);
985 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
987 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
989 print("%s\n", mime ? "text/html" : "HTML file");
1000 char* rfc822_string[] =
1022 q = strchr(p, '\n');
1026 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1033 if(*p != '\t' && *p != ' '){
1037 for(i = 0; rfc822_string[i]; i++) {
1038 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1047 print("%s\n", mime ? "message/rfc822" : "email file");
1059 q = strchr(p, '\n');
1063 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1064 print("%s\n", mime ? "text/plain" : "mail box");
1078 if(Binit(&b, fd, OREAD) == Beof)
1081 type = objtype(&b, &name);
1085 print("%s\n", OCTET);
1087 print("%s intermediate\n", name);
1100 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1102 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1107 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1112 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1118 print("%s\n", PLAIN);
1121 if(wfreq[Alword] > 0)
1122 print("alef program\n");
1124 print("c program\n");
1134 if(wfreq[Lword] < 4)
1136 print("%s\n", mime ? PLAIN : "limbo program");
1146 if(wfreq[Aword] < 2)
1148 print("%s\n", mime ? PLAIN : "as program");
1160 if((p[12] | p[13]<<8) == 0) /* width */
1162 if((p[14] | p[15]<<8) == 0) /* height */
1164 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1166 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1168 if(p[1] == 0){ /* non color-mapped */
1169 if((p[2]&3) != 2 && (p[2]&3) != 3)
1171 if((p[5] | p[6]<<8) != 0) /* palette length */
1174 if(p[1] == 1){ /* color-mapped */
1175 if((p[2]&3) != 1 || p[7] == 0)
1177 if((p[5] | p[6]<<8) == 0) /* palette length */
1181 print("%s\n", mime ? "image/tga" : "targa image");
1192 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1193 if((p[1] & 0xFE) == 0xFA){
1194 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1203 * low entropy means encrypted
1213 memset(bucket, 0, sizeof(bucket));
1214 for(i=nbuf-64; i<nbuf; i++)
1215 bucket[(buf[i]>>5)&07] += 1;
1219 cs += (bucket[i]-8)*(bucket[i]-8);
1222 if(buf[0]==0x1f && buf[1]==0x9d)
1223 print("%s\n", mime ? "application/x-compress" : "compressed");
1225 if(buf[0]==0x1f && buf[1]==0x8b)
1226 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1228 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1229 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1231 if(buf[0]==0x78 && buf[1]==0x9c)
1232 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1234 print("%s\n", mime ? OCTET : "encrypted");
1241 * english by punctuation and frequencies
1246 int vow, comm, rare, badpun, punct;
1249 if(guess != Fascii && guess != Feascii)
1253 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1263 if(p[1] != ' ' && p[1] != '\n')
1266 if(badpun*5 > punct)
1268 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1270 if(2*cfreq[';'] > cfreq['e'])
1274 for(p="AEIOU"; *p; p++) {
1276 vow += cfreq[tolower(*p)];
1279 for(p="ETAION"; *p; p++) {
1281 comm += cfreq[tolower(*p)];
1284 for(p="VJKQXZ"; *p; p++) {
1286 rare += cfreq[tolower(*p)];
1288 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1289 print("%s\n", mime ? PLAIN : "English text");
1296 * pick up a number with
1326 depthof(char *s, int *newp)
1333 while(s<es && *s==' ')
1337 if('0'<=*s && *s<='9')
1338 return 1<<strtol(s, 0, 0);
1342 while(s<es && *s!=' '){
1343 s++; /* skip letter */
1344 d += strtoul(s, &s, 10);
1347 if(d % 8 == 0 || 8 % d == 0)
1356 int dep, lox, loy, hix, hiy, px, new, cmpr;
1366 if(memcmp(cp, "compressed\n", 11) == 0) {
1371 dep = depthof((char*)cp + 0*P9BITLEN, &new);
1374 lox = p9bitnum(cp + 1*P9BITLEN);
1375 loy = p9bitnum(cp + 2*P9BITLEN);
1376 hix = p9bitnum(cp + 3*P9BITLEN);
1377 hiy = p9bitnum(cp + 4*P9BITLEN);
1378 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1382 px = 8/dep; /* pixels per byte */
1383 /* set l to number of bytes of data per scan line */
1385 len = (hix+px-1)/px - lox/px;
1386 else{ /* make positive before divide */
1389 len = (t+hix+px-1)/px;
1392 len = (hix-lox)*dep/8;
1393 len *= hiy - loy; /* col length */
1394 len += 5 * P9BITLEN; /* size of initial ascii */
1397 * for compressed images, don't look any further. otherwise:
1398 * for image file, length is non-zero and must match calculation above.
1399 * for /dev/window and /dev/screen the length is always zero.
1400 * for subfont, the subfont header should follow immediately.
1403 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1408 * mbuf->length == 0 probably indicates reading a pipe.
1409 * Ghostscript sometimes produces a little extra on the end.
1411 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1412 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1413 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1416 if (p9subfont(buf+len)) {
1417 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1428 /* if image too big, assume it's a subfont */
1429 if (p+3*P9BITLEN > buf+sizeof(buf))
1432 n = p9bitnum(p + 0*P9BITLEN); /* char count */
1435 h = p9bitnum(p + 1*P9BITLEN); /* height */
1438 a = p9bitnum(p + 2*P9BITLEN); /* ascent */
1444 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1451 char pathname[1024];
1454 if (!getfontnum(cp, &cp)) /* height */
1456 if (!getfontnum(cp, &cp)) /* ascent */
1458 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1459 if (!getfontnum(cp, &cp)) /* min */
1461 if (!getfontnum(cp, &cp)) /* max */
1463 getfontnum(cp, &cp); /* optional offset */
1464 while (WHITESPACE(*cp))
1466 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1468 /* construct a path name, if needed */
1470 if (*p != '/' && slash) {
1472 if (n < sizeof(pathname))
1473 memcpy(pathname, fname, n);
1476 if (n+cp-p+4 < sizeof(pathname)) {
1477 memcpy(pathname+n, p, cp-p);
1480 if (access(pathname, AEXIST) < 0) {
1481 strcpy(pathname+n, ".0");
1482 if (access(pathname, AEXIST) < 0)
1488 print(mime ? "text/plain\n" : "font file\n");
1495 getfontnum(uchar *cp, uchar **rp)
1497 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1499 if (*cp < '0' || *cp > '9')
1501 strtoul((char *)cp, (char **)rp, 0);
1502 if (!WHITESPACE(**rp)) {
1512 if(strstr((char *)buf, "\\rtf1")){
1513 print(mime ? "application/rtf\n" : "rich text format\n");
1522 if (buf[0] == 0x4d && buf[1] == 0x5a){
1523 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1532 if(buf[0] || buf[1] || buf[3] || buf[9])
1534 if(buf[4] == 0x00 && buf[5] == 0x00)
1538 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1541 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1550 static char *cpu[] = { /* NB: incomplete and arbitary list */
1573 static char *type[] = {
1574 [1] "relocatable object",
1576 [3] "shared library",
1580 if (memcmp(buf, "\x7fELF", 4) == 0){
1583 int n = (buf[19] << 8) | buf[18];
1584 char *p = "unknown";
1585 char *t = "unknown";
1587 if (n > 0 && n < nelem(cpu) && cpu[n])
1590 /* try the other byte order */
1592 n = (buf[18] << 8) | buf[19];
1593 if (n > 0 && n < nelem(cpu) && cpu[n])
1597 n = (buf[16]<< 8) | buf[17];
1599 n = (buf[17]<< 8) | buf[16];
1601 if(n>0 && n < nelem(type) && type[n])
1603 print("%s ELF %s\n", p, t);
1606 print("application/x-elf-executable\n");
1616 int i, j, ldepth, l;
1620 for(j = 0; j < 3; j++){
1621 for(p = (char*)buf, i=0; i<3; i++){
1622 if(p[0] != '0' || p[1] != 'x')
1626 else if(buf[2+4] == ',')
1637 while(*p == ' ' || *p == '\t')
1645 print("application/x-face\n");
1647 print("face image depth %d\n", ldepth);