8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
169 int p9bitnum(uchar*);
170 int p9subfont(uchar*);
171 void print_utf(void);
172 void type(char*, int);
176 int (*call[])(void) =
178 long0, /* recognizable by first 4 bytes */
179 istring, /* recognizable by first string */
180 iself, /* ELF (foreign) executable */
181 isexec, /* native executables */
182 iff, /* interchange file format (strings) */
183 longoff, /* recognizable by 4 bytes at some offset */
184 isoffstr, /* recognizable by string at some offset */
185 isrfc822, /* email file */
186 ismbox, /* mail box */
187 istar, /* recognizable by tar checksum */
188 ishtml, /* html keywords */
189 iscint, /* compiler/assembler intermediate */
190 islimbo, /* limbo source */
191 isc, /* c & alef compiler key words */
192 isas, /* assembler key words */
193 isp9font, /* plan 9 font */
194 isp9bit, /* plan 9 image (as from /dev/window) */
195 isrtf, /* rich text format */
196 ismsdos, /* msdos exe (virus file attachement) */
197 isicocur, /* windows icon or cursor file */
198 isface, /* ascii face file */
202 ismung, /* entropy compressed/encrypted */
203 isenglish, /* char frequency English */
209 char OCTET[] = "application/octet-stream";
210 char PLAIN[] = "text/plain";
213 main(int argc, char *argv[])
224 fprint(2, "usage: file [-m] [file...]\n");
229 if(mime == 0 || argc > 1){
230 for(i = 0; i < argc; i++) {
231 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
243 for(i = 0; i < argc; i++)
244 type(argv[i], maxlen);
250 type(char *file, int nlen)
258 for (i = 0, p = file; *p; i++) {
259 if (*p == '/') /* find rightmost slash */
261 p += chartorune(&r, p); /* count runes */
263 print("%s:%*s",file, nlen-i+1, "");
266 if ((fd = open(file, OREAD)) < 0) {
267 print("cannot open: %r\n");
275 * Unicode 4.0 4-byte runes.
284 fullrune1(char *p, int n)
292 if(n >= 2 && c < 0xE0)
294 if(n >= 3 && c < 0xF0)
303 chartorune1(Rune1 *rune, char *str)
305 int c, c1, c2, c3, n;
311 n = chartorune(&r, str);
316 c1 = *(uchar*)(str+1) & ~0x80;
317 c2 = *(uchar*)(str+2) & ~0x80;
318 c3 = *(uchar*)(str+3) & ~0x80;
319 n = (c<<18) | (c1<<12) | (c2<<6) | c3;
320 if(n < 0x10000 || n > 0x10FFFF){
339 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
344 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
349 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
350 memmove(buf, buf+3, nbuf-3);
354 if(memcmp(buf, "\xFE\xFF", 2) == 0){
360 memmove(rb, buf+2, nbuf);
363 for(i=0; i<nbuf && p < e; i+=2){
364 r = rb[i+1] | rb[i]<<8;
365 p += runetochar(p, &r);
369 nbuf = p - (char*)buf;
371 if(memcmp(buf, "\xFF\xFE", 2) == 0){
377 memmove(rb, buf+2, nbuf);
380 for(i=0; i<nbuf && p < e; i+=2){
381 r = rb[i] | rb[i+1]<<8;
382 p += runetochar(p, &r);
386 nbuf = p - (char*)buf;
400 print("cannot stat: %r\n");
403 if(mbuf->mode & DMDIR) {
404 print("%s\n", mime ? OCTET : "directory");
407 if(mbuf->type != 'M' && mbuf->type != '|') {
409 print("%s\n", OCTET);
411 print("special file #%C/%s\n", mbuf->type, mbuf->name);
414 /* may be reading a pipe on standard input */
415 nbuf = readn(fd, buf, sizeof(buf)-1);
417 print("cannot read: %r\n");
421 print("%s\n", mime ? PLAIN : "empty file");
429 * build histogram table
431 memset(cfreq, 0, sizeof(cfreq));
432 for (i = 0; language[i].name; i++)
433 language[i].count = 0;
434 eob = (char *)buf+nbuf;
435 for(n = 0, p = (char *)buf; p < eob; n++) {
436 if (!fullrune1(p, eob-p) && eob-p < UTFmax1)
438 p += chartorune1(&r, p);
441 else if (r <= 0x7f) {
442 if (!isprint(r) && !isspace(r))
443 f = Ceascii; /* ASCII control char */
445 } else if (r == 0x80) {
449 f = Cbinary; /* Invalid Runes */
451 f = Clatin; /* Latin 1 */
454 f = Cutf; /* UTF extension */
456 cfreq[f]++; /* ASCII chars peg directly */
463 else if (cfreq[Cutf])
465 else if (cfreq[Clatin])
467 else if (cfreq[Ceascii])
469 else if (cfreq[Cnull])
474 * lookup dictionary words
476 memset(wfreq, 0, sizeof(wfreq));
477 if(guess == Fascii || guess == Flatin || guess == Futf)
480 * call individual classify routines
482 for(i=0; call[i]; i++)
488 * print out gross classification
490 if (nbuf < 100 && !mime)
491 print(mime ? PLAIN : "short ");
493 print("%s\n", mime ? PLAIN : "Ascii");
494 else if (guess == Feascii)
495 print("%s\n", mime ? PLAIN : "extended ascii");
496 else if (guess == Flatin)
497 print("%s\n", mime ? PLAIN : "latin ascii");
498 else if (guess == Futf && utf_count() < 4)
500 else print("%s\n", mime ? OCTET : "binary");
504 bump_utf_count(Rune r)
508 high = sizeof(language)/sizeof(language[0])-1;
509 for (low = 0; low < high;) {
511 if (r >= language[mid].low) {
512 if (r <= language[mid].high) {
513 language[mid].count++;
526 for (i = 0; language[i].name; i++)
527 if (language[i].count > 0)
528 switch (language[i].mode) {
544 for (i = 'a'; i < 'z'; i++)
547 for (i = 'A'; i < 'Z'; i++)
554 find_first(char *name)
558 for (i = 0; language[i].name != 0; i++)
559 if (language[i].mode == First
560 && strcmp(language[i].name, name) == 0)
571 print("%s\n", PLAIN);
579 for (i = 0; language[i].name; i++)
580 if (language[i].count) {
581 switch(language[i].mode) {
583 j = find_first(language[i].name);
586 if (language[j].count > 0)
594 print("%s", language[i].name);
609 int low, high, mid, r;
614 while (p < buf+nbuf && !isalpha(*p))
619 while(p < buf+nbuf && isalpha(*p))
623 high = sizeof(dict)/sizeof(dict[0]);
624 for(low = 0;low < high;) {
626 r = strcmp(dict[mid].word, (char*)p2);
628 wfreq[dict[mid].class]++;
640 typedef struct Filemagic Filemagic;
649 * integers in this table must be as seen on a little-endian machine
650 * when read from a file.
652 Filemagic long0tab[] = {
653 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
655 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
657 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
658 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
659 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
660 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
661 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
662 070707, 0xFFFF, "cpio archive", "application/x-cpio",
663 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
664 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
665 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
666 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
668 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
670 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
672 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
674 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
676 * venti & fossil magic numbers are stored big-endian on disk,
677 * thus the numbers appear reversed in this table.
679 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
680 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
684 filemagic(Filemagic *tab, int ntab, ulong x)
688 for(i=0; i<ntab; i++)
689 if((x&tab[i].mask) == tab[i].x){
690 print("%s\n", mime ? tab[i].mime : tab[i].desc);
699 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
702 typedef struct Fileoffmag Fileoffmag;
709 * integers in this table must be as seen on a little-endian machine
710 * when read from a file.
712 Fileoffmag longofftab[] = {
714 * venti & fossil magic numbers are stored big-endian on disk,
715 * thus the numbers appear reversed in this table.
717 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
718 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
719 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
720 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
724 fileoffmagic(Fileoffmag *tab, int ntab)
729 uchar buf[sizeof(long)];
731 for(i=0; i<ntab; i++) {
733 seek(fd, tp->off, 0);
734 if (readn(fd, buf, sizeof buf) != sizeof buf)
737 if((x&tp->mask) == tp->x){
738 print("%s\n", mime ? tp->mime : tp->desc);
748 return fileoffmagic(longofftab, nelem(longofftab));
756 seek(fd, 0, 0); /* reposition to start of file */
757 if(crackhdr(fd, &f)) {
758 print("%s\n", mime ? OCTET : f.name);
766 enum { NAMSIZ = 100, TBLOCK = 512 };
781 char linkname[NAMSIZ];
782 /* rest are defined by POSIX's ustar format; see p1003.2b */
783 char magic[6]; /* "ustar" */
789 char prefix[155]; /* if non-null, path = prefix "/" name */
794 checksum(union hblock *hp)
798 struct header *hdr = &hp->dbuf;
800 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
803 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
813 union hblock *hp = (union hblock *)tblock;
814 struct header *hdr = &hp->dbuf;
816 seek(fd, 0, 0); /* reposition to start of file */
817 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
819 chksum = strtol(hdr->chksum, 0, 8);
820 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
821 if (strcmp(hdr->magic, "ustar") == 0)
822 print(mime? "application/x-ustar\n": "posix tar archive\n");
824 print(mime? "application/x-tar\n": "tar archive\n");
831 * initial words to classify file
841 "\x1f\x9d", "compressed", 2, "application/x-compress",
842 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
843 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
844 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
845 "!<arch>\n", "archive", 8, "application/octet-stream",
846 "070707", "cpio archive - ascii header", 6, "application/octet-stream",
847 "#!/bin/rc", "rc executable file", 9, "text/plain",
848 "#!/bin/sh", "sh executable file", 9, "text/plain",
849 "%!", "postscript", 2, "application/postscript",
850 "\004%!", "postscript", 3, "application/postscript",
851 "x T post", "troff output for post", 8, "application/troff",
852 "x T Latin1", "troff output for Latin1", 10, "application/troff",
853 "x T utf", "troff output for UTF", 7, "application/troff",
854 "x T 202", "troff output for 202", 7, "application/troff",
855 "x T aps", "troff output for aps", 7, "application/troff",
856 "x T ", "troff output", 4, "application/troff",
857 "GIF", "GIF image", 3, "image/gif",
858 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
859 "%PDF", "PDF", 4, "application/pdf",
860 "<!DOCTYPE", "HTML file", 9, "text/html",
861 "<!doctype", "HTML file", 9, "text/html",
862 "<!--", "HTML file", 4, "text/html",
863 "<html>", "HTML file", 6, "text/html",
864 "<HTML>", "HTML file", 6, "text/html",
865 "<?xml", "HTML file", 5, "text/html",
866 "\111\111\052\000", "tiff", 4, "image/tiff",
867 "\115\115\000\052", "tiff", 4, "image/tiff",
868 "\377\330\377\340", "jpeg", 4, "image/jpeg",
869 "\377\330\377\341", "jpeg", 4, "image/jpeg",
870 "\377\330\377\333", "jpeg", 4, "image/jpeg",
871 "\xff\xd8", "jpeg", 2, "image/jpeg",
872 "BM", "bmp", 2, "image/bmp",
873 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
874 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
875 "\033E\033", "HP PCL printer data", 3, OCTET,
876 "\033&", "HP PCL printer data", 2, OCTET,
877 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
878 "\033Lua", "Lua bytecode", 4, OCTET,
879 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
880 "OggS", "ogg audio", 4, "audio/ogg",
881 ".snd", "sun audio", 4, "audio/basic",
882 "\211PNG", "PNG image", 4, "image/png",
883 "P3\n", "ppm", 3, "image/ppm",
884 "P6\n", "ppm", 3, "image/ppm",
885 "/* XPM */\n", "xbm", 10, "image/xbm",
886 ".HTML ", "troff -ms input", 6, "text/troff",
887 ".LP", "troff -ms input", 3, "text/troff",
888 ".ND", "troff -ms input", 3, "text/troff",
889 ".PP", "troff -ms input", 3, "text/troff",
890 ".TL", "troff -ms input", 3, "text/troff",
891 ".TR", "troff -ms input", 3, "text/troff",
892 ".TH", "manual page", 3, "text/troff",
893 ".\\\"", "troff input", 3, "text/troff",
894 ".de", "troff input", 3, "text/troff",
895 ".if", "troff input", 3, "text/troff",
896 ".nr", "troff input", 3, "text/troff",
897 ".tr", "troff input", 3, "text/troff",
898 "vac:", "venti score", 4, "text/plain",
899 "-----BEGIN CERTIFICATE-----\n",
900 "pem certificate", -1, "text/plain",
901 "-----BEGIN TRUSTED CERTIFICATE-----\n",
902 "pem trusted certificate", -1, "text/plain",
903 "-----BEGIN X509 CERTIFICATE-----\n",
904 "pem x.509 certificate", -1, "text/plain",
905 "subject=/C=", "pem certificate with header", -1, "text/plain",
906 "process snapshot ", "process snapshot", -1, "application/snapfs",
907 "d8:announce", "torrent file", 11, "application/x-bittorrent",
908 "[playlist]", "playlist", 10, "application/x-scpls",
909 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
910 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
911 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
919 struct FILE_STRING *p;
921 for(p = file_string; p->key; p++) {
925 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
926 print("%s\n", mime ? p->mime : p->filetype);
930 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
931 for(i = 5; i < nbuf; i++)
935 print("%s\n", OCTET);
937 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
948 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
959 for(p = offstrs; p->key; p++) {
964 if (readn(fd, buf, n) != n)
966 if(memcmp(buf, p->key, n) == 0) {
967 print("%s\n", mime ? p->mime : p->filetype);
977 if (strncmp((char*)buf, "FORM", 4) == 0 &&
978 strncmp((char*)buf+8, "AIFF", 4) == 0) {
979 print("%s\n", mime? "audio/x-aiff": "aiff audio");
982 if (strncmp((char*)buf, "RIFF", 4) == 0) {
983 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
984 print("%s\n", mime? "audio/wave": "wave audio");
985 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
986 print("%s\n", mime? "video/avi": "avi video");
988 print("%s\n", mime? "application/octet-stream": "riff file");
994 char* html_string[] = {
996 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
998 "button", "center", "iframe", "object", "option", "script",
1000 "blink", "embed", "frame", "input", "label", "param", "small",
1001 "style", "table", "tbody", "tfoot", "thead", "title",
1002 "?xml", "body", "code", "font", "form", "head", "html",
1003 "link", "menu", "meta", "span",
1004 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
1005 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
1006 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
1007 "a", "b", "i", "p", "q", "u",
1020 while(p < buf+nbuf && *p != '<')
1029 for(i = 0; html_string[i]; i++){
1030 n = strlen(html_string[i]);
1031 if(p + n > buf+nbuf)
1033 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1035 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1037 print("%s\n", mime ? "text/html" : "HTML file");
1048 char* rfc822_string[] =
1070 q = strchr(p, '\n');
1074 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1081 if(*p != '\t' && *p != ' '){
1085 for(i = 0; rfc822_string[i]; i++) {
1086 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1095 print("%s\n", mime ? "message/rfc822" : "email file");
1107 q = strchr(p, '\n');
1111 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1112 print("%s\n", mime ? "text/plain" : "mail box");
1126 if(Binit(&b, fd, OREAD) == Beof)
1129 type = objtype(&b, &name);
1133 print("%s\n", OCTET);
1135 print("%s intermediate\n", name);
1148 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1150 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1155 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1160 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1166 print("%s\n", PLAIN);
1169 if(wfreq[Alword] > 0)
1170 print("alef program\n");
1172 print("c program\n");
1182 if(wfreq[Lword] < 4)
1184 print("%s\n", mime ? PLAIN : "limbo program");
1194 if(wfreq[Aword] < 2)
1196 print("%s\n", mime ? PLAIN : "as program");
1207 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1208 if((p[1] & 0xFE) == 0xFA){
1209 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1218 * low entropy means encrypted
1228 memset(bucket, 0, sizeof(bucket));
1229 for(i=nbuf-64; i<nbuf; i++)
1230 bucket[(buf[i]>>5)&07] += 1;
1234 cs += (bucket[i]-8)*(bucket[i]-8);
1237 if(buf[0]==0x1f && buf[1]==0x9d)
1238 print("%s\n", mime ? "application/x-compress" : "compressed");
1240 if(buf[0]==0x1f && buf[1]==0x8b)
1241 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1243 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1244 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1246 if(buf[0]==0x78 && buf[1]==0x9c)
1247 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1249 print("%s\n", mime ? OCTET : "encrypted");
1256 * english by punctuation and frequencies
1261 int vow, comm, rare, badpun, punct;
1264 if(guess != Fascii && guess != Feascii)
1268 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1278 if(p[1] != ' ' && p[1] != '\n')
1281 if(badpun*5 > punct)
1283 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1285 if(2*cfreq[';'] > cfreq['e'])
1289 for(p="AEIOU"; *p; p++) {
1291 vow += cfreq[tolower(*p)];
1294 for(p="ETAION"; *p; p++) {
1296 comm += cfreq[tolower(*p)];
1299 for(p="VJKQXZ"; *p; p++) {
1301 rare += cfreq[tolower(*p)];
1303 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1304 print("%s\n", mime ? PLAIN : "English text");
1311 * pick up a number with
1341 depthof(char *s, int *newp)
1348 while(s<es && *s==' ')
1352 if('0'<=*s && *s<='9')
1353 return 1<<strtol(s, 0, 0);
1357 while(s<es && *s!=' '){
1358 s++; /* skip letter */
1359 d += strtoul(s, &s, 10);
1362 if(d % 8 == 0 || 8 % d == 0)
1371 int dep, lox, loy, hix, hiy, px, new, cmpr;
1381 if(memcmp(cp, "compressed\n", 11) == 0) {
1386 dep = depthof((char*)cp + 0*P9BITLEN, &new);
1389 lox = p9bitnum(cp + 1*P9BITLEN);
1390 loy = p9bitnum(cp + 2*P9BITLEN);
1391 hix = p9bitnum(cp + 3*P9BITLEN);
1392 hiy = p9bitnum(cp + 4*P9BITLEN);
1393 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1397 px = 8/dep; /* pixels per byte */
1398 /* set l to number of bytes of data per scan line */
1400 len = (hix+px-1)/px - lox/px;
1401 else{ /* make positive before divide */
1404 len = (t+hix+px-1)/px;
1407 len = (hix-lox)*dep/8;
1408 len *= hiy - loy; /* col length */
1409 len += 5 * P9BITLEN; /* size of initial ascii */
1412 * for compressed images, don't look any further. otherwise:
1413 * for image file, length is non-zero and must match calculation above.
1414 * for /dev/window and /dev/screen the length is always zero.
1415 * for subfont, the subfont header should follow immediately.
1418 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1423 * mbuf->length == 0 probably indicates reading a pipe.
1424 * Ghostscript sometimes produces a little extra on the end.
1426 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1427 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1428 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1431 if (p9subfont(buf+len)) {
1432 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1443 /* if image too big, assume it's a subfont */
1444 if (p+3*P9BITLEN > buf+sizeof(buf))
1447 n = p9bitnum(p + 0*P9BITLEN); /* char count */
1450 h = p9bitnum(p + 1*P9BITLEN); /* height */
1453 a = p9bitnum(p + 2*P9BITLEN); /* ascent */
1459 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1466 char pathname[1024];
1469 if (!getfontnum(cp, &cp)) /* height */
1471 if (!getfontnum(cp, &cp)) /* ascent */
1473 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1474 if (!getfontnum(cp, &cp)) /* min */
1476 if (!getfontnum(cp, &cp)) /* max */
1478 getfontnum(cp, &cp); /* optional offset */
1479 while (WHITESPACE(*cp))
1481 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1483 /* construct a path name, if needed */
1485 if (*p != '/' && slash) {
1487 if (n < sizeof(pathname))
1488 memcpy(pathname, fname, n);
1491 if (n+cp-p+4 < sizeof(pathname)) {
1492 memcpy(pathname+n, p, cp-p);
1495 if (access(pathname, AEXIST) < 0) {
1496 strcpy(pathname+n, ".0");
1497 if (access(pathname, AEXIST) < 0)
1503 print(mime ? "text/plain\n" : "font file\n");
1510 getfontnum(uchar *cp, uchar **rp)
1512 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1514 if (*cp < '0' || *cp > '9')
1516 strtoul((char *)cp, (char **)rp, 0);
1517 if (!WHITESPACE(**rp)) {
1527 if(strstr((char *)buf, "\\rtf1")){
1528 print(mime ? "application/rtf\n" : "rich text format\n");
1537 if (buf[0] == 0x4d && buf[1] == 0x5a){
1538 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1547 if(buf[0] || buf[1] || buf[3] || buf[9])
1549 if(buf[4] == 0x00 && buf[5] == 0x00)
1553 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1556 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1565 static char *cpu[] = { /* NB: incomplete and arbitary list */
1588 static char *type[] = {
1589 [1] "relocatable object",
1591 [3] "shared library",
1595 if (memcmp(buf, "\x7fELF", 4) == 0){
1598 int n = (buf[19] << 8) | buf[18];
1599 char *p = "unknown";
1600 char *t = "unknown";
1602 if (n > 0 && n < nelem(cpu) && cpu[n])
1605 /* try the other byte order */
1607 n = (buf[18] << 8) | buf[19];
1608 if (n > 0 && n < nelem(cpu) && cpu[n])
1612 n = (buf[16]<< 8) | buf[17];
1614 n = (buf[17]<< 8) | buf[16];
1616 if(n>0 && n < nelem(type) && type[n])
1618 print("%s ELF %s\n", p, t);
1621 print("application/x-elf-executable\n");
1631 int i, j, ldepth, l;
1635 for(j = 0; j < 3; j++){
1636 for(p = (char*)buf, i=0; i<3; i++){
1637 if(p[0] != '0' || p[1] != 'x')
1641 else if(buf[2+4] == ',')
1652 while(*p == ' ' || *p == '\t')
1660 print("application/x-face\n");
1662 print("face image depth %d\n", ldepth);