8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
170 int p9bitnum(uchar*);
171 int p9subfont(uchar*);
172 void print_utf(void);
173 void type(char*, int);
177 int (*call[])(void) =
179 long0, /* recognizable by first 4 bytes */
180 istring, /* recognizable by first string */
181 iself, /* ELF (foreign) executable */
182 isexec, /* native executables */
183 iff, /* interchange file format (strings) */
184 longoff, /* recognizable by 4 bytes at some offset */
185 isoffstr, /* recognizable by string at some offset */
186 isrfc822, /* email file */
187 ismbox, /* mail box */
188 istar, /* recognizable by tar checksum */
189 iscint, /* compiler/assembler intermediate */
190 ishtml, /* html keywords */
191 islimbo, /* limbo source */
192 isc, /* c & alef compiler key words */
193 isas, /* assembler key words */
194 isp9font, /* plan 9 font */
195 isp9bit, /* plan 9 image (as from /dev/window) */
196 isrtf, /* rich text format */
197 ismsdos, /* msdos exe (virus file attachement) */
198 isicocur, /* windows icon or cursor file */
199 isface, /* ascii face file */
204 ismung, /* entropy compressed/encrypted */
205 isenglish, /* char frequency English */
211 char OCTET[] = "application/octet-stream";
212 char PLAIN[] = "text/plain";
215 main(int argc, char *argv[])
226 fprint(2, "usage: file [-m] [file...]\n");
231 if(mime == 0 || argc > 1){
232 for(i = 0; i < argc; i++) {
233 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
245 for(i = 0; i < argc; i++)
246 type(argv[i], maxlen);
252 type(char *file, int nlen)
260 for (i = 0, p = file; *p; i++) {
261 if (*p == '/') /* find rightmost slash */
263 p += chartorune(&r, p); /* count runes */
265 print("%s:%*s",file, nlen-i+1, "");
268 if ((fd = open(file, OREAD)) < 0) {
269 print("cannot open: %r\n");
287 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
292 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
297 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298 memmove(buf, buf+3, nbuf-3);
302 if(memcmp(buf, "\xFE\xFF", 2) == 0){
308 memmove(rb, buf+2, nbuf);
310 e = p+sizeof(buf)-UTFmax-1;
311 for(i=0; i<nbuf && p < e; i+=2){
312 r = rb[i+1] | rb[i]<<8;
313 p += runetochar(p, &r);
317 nbuf = p - (char*)buf;
319 if(memcmp(buf, "\xFF\xFE", 2) == 0){
325 memmove(rb, buf+2, nbuf);
327 e = p+sizeof(buf)-UTFmax-1;
328 for(i=0; i<nbuf && p < e; i+=2){
329 r = rb[i] | rb[i+1]<<8;
330 p += runetochar(p, &r);
334 nbuf = p - (char*)buf;
348 print("cannot stat: %r\n");
351 if(mbuf->mode & DMDIR) {
352 print("%s\n", mime ? OCTET : "directory");
355 if(mbuf->type != 'M' && mbuf->type != '|') {
357 print("%s\n", OCTET);
359 print("special file #%C/%s\n", mbuf->type, mbuf->name);
362 /* may be reading a pipe on standard input */
363 nbuf = readn(fd, buf, sizeof(buf)-1);
365 print("cannot read: %r\n");
369 print("%s\n", mime ? PLAIN : "empty file");
377 * build histogram table
379 memset(cfreq, 0, sizeof(cfreq));
380 for (i = 0; language[i].name; i++)
381 language[i].count = 0;
382 eob = (char *)buf+nbuf;
383 for(n = 0, p = (char *)buf; p < eob; n++) {
384 if (!fullrune(p, eob-p) && eob-p < UTFmax)
386 p += chartorune(&r, p);
389 else if (r <= 0x7f) {
390 if (!isprint(r) && !isspace(r))
391 f = Ceascii; /* ASCII control char */
393 } else if (r == 0x80) {
397 f = Cbinary; /* Invalid Runes */
399 f = Clatin; /* Latin 1 */
402 f = Cutf; /* UTF extension */
404 cfreq[f]++; /* ASCII chars peg directly */
411 else if (cfreq[Cutf])
413 else if (cfreq[Clatin])
415 else if (cfreq[Ceascii])
417 else if (cfreq[Cnull])
422 * lookup dictionary words
424 memset(wfreq, 0, sizeof(wfreq));
425 if(guess == Fascii || guess == Flatin || guess == Futf)
428 * call individual classify routines
430 for(i=0; call[i]; i++)
436 * print out gross classification
438 if (nbuf < 100 && !mime)
439 print(mime ? PLAIN : "short ");
441 print("%s\n", mime ? PLAIN : "Ascii");
442 else if (guess == Feascii)
443 print("%s\n", mime ? PLAIN : "extended ascii");
444 else if (guess == Flatin)
445 print("%s\n", mime ? PLAIN : "latin ascii");
446 else if (guess == Futf && utf_count() < 4)
448 else print("%s\n", mime ? OCTET : "binary");
452 bump_utf_count(Rune r)
456 high = sizeof(language)/sizeof(language[0])-1;
457 for (low = 0; low < high;) {
459 if (r >= language[mid].low) {
460 if (r <= language[mid].high) {
461 language[mid].count++;
474 for (i = 0; language[i].name; i++)
475 if (language[i].count > 0)
476 switch (language[i].mode) {
492 for (i = 'a'; i < 'z'; i++)
495 for (i = 'A'; i < 'Z'; i++)
502 find_first(char *name)
506 for (i = 0; language[i].name != 0; i++)
507 if (language[i].mode == First
508 && strcmp(language[i].name, name) == 0)
519 print("%s\n", PLAIN);
527 for (i = 0; language[i].name; i++)
528 if (language[i].count) {
529 switch(language[i].mode) {
531 j = find_first(language[i].name);
534 if (language[j].count > 0)
542 print("%s", language[i].name);
557 int low, high, mid, r;
562 while (p < buf+nbuf && !isalpha(*p))
567 while(p < buf+nbuf && isalpha(*p))
571 high = sizeof(dict)/sizeof(dict[0]);
572 for(low = 0;low < high;) {
574 r = strcmp(dict[mid].word, (char*)p2);
576 wfreq[dict[mid].class]++;
588 typedef struct Filemagic Filemagic;
597 * integers in this table must be as seen on a little-endian machine
598 * when read from a file.
600 Filemagic long0tab[] = {
601 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
603 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
605 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
606 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
607 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
608 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
609 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
610 070707, 0xFFFF, "cpio archive", "application/x-cpio",
611 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
612 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
613 0xf0ff, 0xf6ff, "aac audio\n", "audio/mpeg",
614 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
617 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
619 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
621 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
623 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
625 * venti & fossil magic numbers are stored big-endian on disk,
626 * thus the numbers appear reversed in this table.
628 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
629 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
633 filemagic(Filemagic *tab, int ntab, ulong x)
637 for(i=0; i<ntab; i++)
638 if((x&tab[i].mask) == tab[i].x){
639 print("%s\n", mime ? tab[i].mime : tab[i].desc);
648 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
651 typedef struct Fileoffmag Fileoffmag;
658 * integers in this table must be as seen on a little-endian machine
659 * when read from a file.
661 Fileoffmag longofftab[] = {
663 * venti & fossil magic numbers are stored big-endian on disk,
664 * thus the numbers appear reversed in this table.
666 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
667 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
668 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
669 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
673 fileoffmagic(Fileoffmag *tab, int ntab)
678 uchar buf[sizeof(long)];
680 for(i=0; i<ntab; i++) {
682 seek(fd, tp->off, 0);
683 if (readn(fd, buf, sizeof buf) != sizeof buf)
686 if((x&tp->mask) == tp->x){
687 print("%s\n", mime ? tp->mime : tp->desc);
697 return fileoffmagic(longofftab, nelem(longofftab));
705 seek(fd, 0, 0); /* reposition to start of file */
706 if(crackhdr(fd, &f)) {
707 print("%s\n", mime ? OCTET : f.name);
715 enum { NAMSIZ = 100, TBLOCK = 512 };
730 char linkname[NAMSIZ];
731 /* rest are defined by POSIX's ustar format; see p1003.2b */
732 char magic[6]; /* "ustar" */
738 char prefix[155]; /* if non-null, path = prefix "/" name */
743 checksum(union hblock *hp)
747 struct header *hdr = &hp->dbuf;
749 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
752 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
762 union hblock *hp = (union hblock *)tblock;
763 struct header *hdr = &hp->dbuf;
765 seek(fd, 0, 0); /* reposition to start of file */
766 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
768 chksum = strtol(hdr->chksum, 0, 8);
769 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
770 if (strcmp(hdr->magic, "ustar") == 0)
771 print(mime? "application/x-ustar\n": "posix tar archive\n");
773 print(mime? "application/x-tar\n": "tar archive\n");
780 * initial words to classify file
790 "\x1f\x9d", "compressed", 2, "application/x-compress",
791 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
792 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
793 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
794 "!<arch>\n", "archive", 8, "application/octet-stream",
795 "070707", "cpio archive - ascii header", 6, "application/octet-stream",
796 "#!/bin/rc", "rc executable file", 9, "text/plain",
797 "#!/bin/sh", "sh executable file", 9, "text/plain",
798 "%!", "postscript", 2, "application/postscript",
799 "\004%!", "postscript", 3, "application/postscript",
800 "x T post", "troff output for post", 8, "application/troff",
801 "x T Latin1", "troff output for Latin1", 10, "application/troff",
802 "x T utf", "troff output for UTF", 7, "application/troff",
803 "x T 202", "troff output for 202", 7, "application/troff",
804 "x T aps", "troff output for aps", 7, "application/troff",
805 "x T ", "troff output", 4, "application/troff",
806 "GIF", "GIF image", 3, "image/gif",
807 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
808 "%PDF", "PDF", 4, "application/pdf",
809 "<!DOCTYPE", "HTML file", 9, "text/html",
810 "<!doctype", "HTML file", 9, "text/html",
811 "<!--", "HTML file", 4, "text/html",
812 "<html>", "HTML file", 6, "text/html",
813 "<HTML>", "HTML file", 6, "text/html",
814 "<?xml", "HTML file", 5, "text/html",
815 "\111\111\052\000", "tiff", 4, "image/tiff",
816 "\115\115\000\052", "tiff", 4, "image/tiff",
817 "\377\330\377\340", "jpeg", 4, "image/jpeg",
818 "\377\330\377\341", "jpeg", 4, "image/jpeg",
819 "\377\330\377\333", "jpeg", 4, "image/jpeg",
820 "\xff\xd8", "jpeg", 2, "image/jpeg",
821 "BM", "bmp", 2, "image/bmp",
822 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
823 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
824 "\033E\033", "HP PCL printer data", 3, OCTET,
825 "\033&", "HP PCL printer data", 2, OCTET,
826 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
827 "\033Lua", "Lua bytecode", 4, OCTET,
828 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
829 "OggS", "ogg audio", 4, "audio/ogg",
830 ".snd", "sun audio", 4, "audio/basic",
831 "\211PNG", "PNG image", 4, "image/png",
832 "P1\n", "ppm", 3, "image/ppm",
833 "P2\n", "ppm", 3, "image/ppm",
834 "P3\n", "ppm", 3, "image/ppm",
835 "P4\n", "ppm", 3, "image/ppm",
836 "P5\n", "ppm", 3, "image/ppm",
837 "P6\n", "ppm", 3, "image/ppm",
838 "/* XPM */\n", "xbm", 10, "image/xbm",
839 ".HTML ", "troff -ms input", 6, "text/troff",
840 ".LP", "troff -ms input", 3, "text/troff",
841 ".ND", "troff -ms input", 3, "text/troff",
842 ".PP", "troff -ms input", 3, "text/troff",
843 ".TL", "troff -ms input", 3, "text/troff",
844 ".TR", "troff -ms input", 3, "text/troff",
845 ".TH", "manual page", 3, "text/troff",
846 ".\\\"", "troff input", 3, "text/troff",
847 ".de", "troff input", 3, "text/troff",
848 ".if", "troff input", 3, "text/troff",
849 ".nr", "troff input", 3, "text/troff",
850 ".tr", "troff input", 3, "text/troff",
851 "vac:", "venti score", 4, "text/plain",
852 "-----BEGIN CERTIFICATE-----\n",
853 "pem certificate", -1, "text/plain",
854 "-----BEGIN TRUSTED CERTIFICATE-----\n",
855 "pem trusted certificate", -1, "text/plain",
856 "-----BEGIN X509 CERTIFICATE-----\n",
857 "pem x.509 certificate", -1, "text/plain",
858 "subject=/C=", "pem certificate with header", -1, "text/plain",
859 "process snapshot ", "process snapshot", -1, "application/snapfs",
860 "d8:announce", "torrent file", 11, "application/x-bittorrent",
861 "[playlist]", "playlist", 10, "application/x-scpls",
862 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
863 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
864 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
872 struct FILE_STRING *p;
874 for(p = file_string; p->key; p++) {
878 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
879 print("%s\n", mime ? p->mime : p->filetype);
883 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
884 for(i = 5; i < nbuf; i++)
888 print("%s\n", OCTET);
890 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
901 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
912 for(p = offstrs; p->key; p++) {
917 if (readn(fd, buf, n) != n)
919 if(memcmp(buf, p->key, n) == 0) {
920 print("%s\n", mime ? p->mime : p->filetype);
930 if (strncmp((char*)buf, "FORM", 4) == 0 &&
931 strncmp((char*)buf+8, "AIFF", 4) == 0) {
932 print("%s\n", mime? "audio/x-aiff": "aiff audio");
935 if (strncmp((char*)buf, "RIFF", 4) == 0) {
936 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
937 print("%s\n", mime? "audio/wave": "wave audio");
938 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
939 print("%s\n", mime? "video/avi": "avi video");
941 print("%s\n", mime? "application/octet-stream": "riff file");
947 char* html_string[] = {
949 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
951 "button", "center", "iframe", "object", "option", "script",
953 "blink", "embed", "frame", "input", "label", "param", "small",
954 "style", "table", "tbody", "tfoot", "thead", "title",
955 "?xml", "body", "code", "font", "form", "head", "html",
956 "link", "menu", "meta", "span",
957 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
958 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
959 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
960 "a", "b", "i", "p", "q", "u",
973 while(p < buf+nbuf && *p != '<')
982 for(i = 0; html_string[i]; i++){
983 n = strlen(html_string[i]);
986 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
988 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
990 print("%s\n", mime ? "text/html" : "HTML file");
1001 char* rfc822_string[] =
1023 q = strchr(p, '\n');
1027 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1034 if(*p != '\t' && *p != ' '){
1038 for(i = 0; rfc822_string[i]; i++) {
1039 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1048 print("%s\n", mime ? "message/rfc822" : "email file");
1060 q = strchr(p, '\n');
1064 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1065 print("%s\n", mime ? "text/plain" : "mail box");
1079 if(Binit(&b, fd, OREAD) == Beof)
1082 type = objtype(&b, &name);
1086 print("%s\n", OCTET);
1088 print("%s intermediate\n", name);
1101 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1103 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1108 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1113 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1119 print("%s\n", PLAIN);
1122 if(wfreq[Alword] > 0)
1123 print("alef program\n");
1125 print("c program\n");
1135 if(wfreq[Lword] < 4)
1137 print("%s\n", mime ? PLAIN : "limbo program");
1147 if(wfreq[Aword] < 2)
1149 print("%s\n", mime ? PLAIN : "as program");
1161 if((p[12] | p[13]<<8) == 0) /* width */
1163 if((p[14] | p[15]<<8) == 0) /* height */
1165 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1167 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1169 if(p[1] == 0){ /* non color-mapped */
1170 if((p[2]&3) != 2 && (p[2]&3) != 3)
1172 if((p[5] | p[6]<<8) != 0) /* palette length */
1175 if(p[1] == 1){ /* color-mapped */
1176 if((p[2]&3) != 1 || p[7] == 0)
1178 if((p[5] | p[6]<<8) == 0) /* palette length */
1182 print("%s\n", mime ? "image/tga" : "targa image");
1193 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1194 if((p[1] & 0xFE) == 0xFA){
1195 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1204 * low entropy means encrypted
1214 memset(bucket, 0, sizeof(bucket));
1215 for(i=nbuf-64; i<nbuf; i++)
1216 bucket[(buf[i]>>5)&07] += 1;
1220 cs += (bucket[i]-8)*(bucket[i]-8);
1223 if(buf[0]==0x1f && buf[1]==0x9d)
1224 print("%s\n", mime ? "application/x-compress" : "compressed");
1226 if(buf[0]==0x1f && buf[1]==0x8b)
1227 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1229 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1230 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1232 if(buf[0]==0x78 && buf[1]==0x9c)
1233 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1235 print("%s\n", mime ? OCTET : "encrypted");
1242 * english by punctuation and frequencies
1247 int vow, comm, rare, badpun, punct;
1250 if(guess != Fascii && guess != Feascii)
1254 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1264 if(p[1] != ' ' && p[1] != '\n')
1267 if(badpun*5 > punct)
1269 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1271 if(2*cfreq[';'] > cfreq['e'])
1275 for(p="AEIOU"; *p; p++) {
1277 vow += cfreq[tolower(*p)];
1280 for(p="ETAION"; *p; p++) {
1282 comm += cfreq[tolower(*p)];
1285 for(p="VJKQXZ"; *p; p++) {
1287 rare += cfreq[tolower(*p)];
1289 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1290 print("%s\n", mime ? PLAIN : "English text");
1297 * pick up a number with
1327 depthof(char *s, int *newp)
1334 while(s<es && *s==' ')
1338 if('0'<=*s && *s<='9')
1339 return 1<<strtol(s, 0, 0);
1343 while(s<es && *s!=' '){
1344 s++; /* skip letter */
1345 d += strtoul(s, &s, 10);
1348 if(d % 8 == 0 || 8 % d == 0)
1357 int dep, lox, loy, hix, hiy, px, new, cmpr;
1367 if(memcmp(cp, "compressed\n", 11) == 0) {
1372 dep = depthof((char*)cp + 0*P9BITLEN, &new);
1375 lox = p9bitnum(cp + 1*P9BITLEN);
1376 loy = p9bitnum(cp + 2*P9BITLEN);
1377 hix = p9bitnum(cp + 3*P9BITLEN);
1378 hiy = p9bitnum(cp + 4*P9BITLEN);
1379 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1383 px = 8/dep; /* pixels per byte */
1384 /* set l to number of bytes of data per scan line */
1386 len = (hix+px-1)/px - lox/px;
1387 else{ /* make positive before divide */
1390 len = (t+hix+px-1)/px;
1393 len = (hix-lox)*dep/8;
1394 len *= hiy - loy; /* col length */
1395 len += 5 * P9BITLEN; /* size of initial ascii */
1398 * for compressed images, don't look any further. otherwise:
1399 * for image file, length is non-zero and must match calculation above.
1400 * for /dev/window and /dev/screen the length is always zero.
1401 * for subfont, the subfont header should follow immediately.
1404 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1409 * mbuf->length == 0 probably indicates reading a pipe.
1410 * Ghostscript sometimes produces a little extra on the end.
1412 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1413 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1414 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1417 if (p9subfont(buf+len)) {
1418 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1429 /* if image too big, assume it's a subfont */
1430 if (p+3*P9BITLEN > buf+sizeof(buf))
1433 n = p9bitnum(p + 0*P9BITLEN); /* char count */
1436 h = p9bitnum(p + 1*P9BITLEN); /* height */
1439 a = p9bitnum(p + 2*P9BITLEN); /* ascent */
1445 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1452 char pathname[1024];
1455 if (!getfontnum(cp, &cp)) /* height */
1457 if (!getfontnum(cp, &cp)) /* ascent */
1459 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1460 if (!getfontnum(cp, &cp)) /* min */
1462 if (!getfontnum(cp, &cp)) /* max */
1464 getfontnum(cp, &cp); /* optional offset */
1465 while (WHITESPACE(*cp))
1467 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1469 /* construct a path name, if needed */
1471 if (*p != '/' && slash) {
1473 if (n < sizeof(pathname))
1474 memcpy(pathname, fname, n);
1477 if (n+cp-p+4 < sizeof(pathname)) {
1478 memcpy(pathname+n, p, cp-p);
1481 if (access(pathname, AEXIST) < 0) {
1482 strcpy(pathname+n, ".0");
1483 if (access(pathname, AEXIST) < 0)
1489 print(mime ? "text/plain\n" : "font file\n");
1496 getfontnum(uchar *cp, uchar **rp)
1498 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1500 if (*cp < '0' || *cp > '9')
1502 strtoul((char *)cp, (char **)rp, 0);
1503 if (!WHITESPACE(**rp)) {
1513 if(strstr((char *)buf, "\\rtf1")){
1514 print(mime ? "application/rtf\n" : "rich text format\n");
1523 if (buf[0] == 0x4d && buf[1] == 0x5a){
1524 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1533 if(buf[0] || buf[1] || buf[3] || buf[9])
1535 if(buf[4] == 0x00 && buf[5] == 0x00)
1539 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1542 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1551 static char *cpu[] = { /* NB: incomplete and arbitary list */
1574 static char *type[] = {
1575 [1] "relocatable object",
1577 [3] "shared library",
1581 if (memcmp(buf, "\x7fELF", 4) == 0){
1584 int n = (buf[19] << 8) | buf[18];
1585 char *p = "unknown";
1586 char *t = "unknown";
1588 if (n > 0 && n < nelem(cpu) && cpu[n])
1591 /* try the other byte order */
1593 n = (buf[18] << 8) | buf[19];
1594 if (n > 0 && n < nelem(cpu) && cpu[n])
1598 n = (buf[16]<< 8) | buf[17];
1600 n = (buf[17]<< 8) | buf[16];
1602 if(n>0 && n < nelem(type) && type[n])
1604 print("%s ELF %s\n", p, t);
1607 print("application/x-elf-executable\n");
1617 int i, j, ldepth, l;
1621 for(j = 0; j < 3; j++){
1622 for(p = (char*)buf, i=0; i<3; i++){
1623 if(p[0] != '0' || p[1] != 'x')
1627 else if(buf[2+4] == ',')
1638 while(*p == ' ' || *p == '\t')
1646 print("application/x-face\n");
1648 print("face image depth %d\n", ldepth);