8 * file - determine type of file
10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
84 /* codes for 'mode' field in language structure */
87 First, /* first entry for language spanning several ranges */
88 Multi, /* later entries " " " ... */
89 Shared, /* codes used in several languages */
94 int mode; /* see enum above */
102 Normal, 0, 0x0100, 0x01FF, "Extended Latin",
103 Normal, 0, 0x0370, 0x03FF, "Greek",
104 Normal, 0, 0x0400, 0x04FF, "Cyrillic",
105 Normal, 0, 0x0530, 0x058F, "Armenian",
106 Normal, 0, 0x0590, 0x05FF, "Hebrew",
107 Normal, 0, 0x0600, 0x06FF, "Arabic",
108 Normal, 0, 0x0900, 0x097F, "Devanagari",
109 Normal, 0, 0x0980, 0x09FF, "Bengali",
110 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
111 Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
112 Normal, 0, 0x0B00, 0x0B7F, "Oriya",
113 Normal, 0, 0x0B80, 0x0BFF, "Tamil",
114 Normal, 0, 0x0C00, 0x0C7F, "Telugu",
115 Normal, 0, 0x0C80, 0x0CFF, "Kannada",
116 Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
117 Normal, 0, 0x0E00, 0x0E7F, "Thai",
118 Normal, 0, 0x0E80, 0x0EFF, "Lao",
119 Normal, 0, 0x1000, 0x105F, "Tibetan",
120 Normal, 0, 0x10A0, 0x10FF, "Georgian",
121 Normal, 0, 0x3040, 0x30FF, "Japanese",
122 Normal, 0, 0x3100, 0x312F, "Chinese",
123 First, 0, 0x3130, 0x318F, "Korean",
124 Multi, 0, 0x3400, 0x3D2F, "Korean",
125 Shared, 0, 0x4e00, 0x9fff, "CJK",
126 Normal, 0, 0, 0, 0, /* terminal entry */
132 Fascii, /* printable ascii */
134 Futf, /* UTF character set */
135 Fbinary, /* binary */
136 Feascii, /* ASCII with control chars */
137 Fnull, /* NULL in file */
140 void bump_utf_count(Rune);
141 int cistrncmp(char*, char*, int);
143 int getfontnum(uchar*, uchar**);
170 int p9bitnum(uchar*);
171 int p9subfont(uchar*);
172 void print_utf(void);
173 void type(char*, int);
177 int (*call[])(void) =
179 long0, /* recognizable by first 4 bytes */
180 istring, /* recognizable by first string */
181 iself, /* ELF (foreign) executable */
182 isexec, /* native executables */
183 iff, /* interchange file format (strings) */
184 longoff, /* recognizable by 4 bytes at some offset */
185 isoffstr, /* recognizable by string at some offset */
186 isrfc822, /* email file */
187 ismbox, /* mail box */
188 istar, /* recognizable by tar checksum */
189 iscint, /* compiler/assembler intermediate */
190 ishtml, /* html keywords */
191 islimbo, /* limbo source */
192 isc, /* c & alef compiler key words */
193 isas, /* assembler key words */
194 isp9font, /* plan 9 font */
195 isp9bit, /* plan 9 image (as from /dev/window) */
196 isrtf, /* rich text format */
197 ismsdos, /* msdos exe (virus file attachement) */
198 isicocur, /* windows icon or cursor file */
199 isface, /* ascii face file */
204 ismung, /* entropy compressed/encrypted */
205 isenglish, /* char frequency English */
211 char OCTET[] = "application/octet-stream";
212 char PLAIN[] = "text/plain";
215 main(int argc, char *argv[])
226 fprint(2, "usage: file [-m] [file...]\n");
231 if(mime == 0 || argc > 1){
232 for(i = 0; i < argc; i++) {
233 for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
245 for(i = 0; i < argc; i++)
246 type(argv[i], maxlen);
252 type(char *file, int nlen)
260 for (i = 0, p = file; *p; i++) {
261 if (*p == '/') /* find rightmost slash */
263 p += chartorune(&r, p); /* count runes */
265 print("%s:%*s",file, nlen-i+1, "");
268 if ((fd = open(file, OREAD)) < 0) {
269 print("cannot open: %r\n");
287 if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
292 if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
297 if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298 memmove(buf, buf+3, nbuf-3);
302 if(memcmp(buf, "\xFE\xFF", 2) == 0){
308 memmove(rb, buf+2, nbuf);
310 e = p+sizeof(buf)-UTFmax-1;
311 for(i=0; i<nbuf && p < e; i+=2){
312 r = rb[i+1] | rb[i]<<8;
313 p += runetochar(p, &r);
317 nbuf = p - (char*)buf;
319 if(memcmp(buf, "\xFF\xFE", 2) == 0){
325 memmove(rb, buf+2, nbuf);
327 e = p+sizeof(buf)-UTFmax-1;
328 for(i=0; i<nbuf && p < e; i+=2){
329 r = rb[i] | rb[i+1]<<8;
330 p += runetochar(p, &r);
334 nbuf = p - (char*)buf;
348 print("cannot stat: %r\n");
351 if(mbuf->mode & DMDIR) {
352 print("%s\n", mime ? OCTET : "directory");
355 if(mbuf->type != 'M' && mbuf->type != '|') {
357 print("%s\n", OCTET);
359 print("special file #%C/%s\n", mbuf->type, mbuf->name);
362 /* may be reading a pipe on standard input */
363 nbuf = readn(fd, buf, sizeof(buf)-1);
365 print("cannot read: %r\n");
369 print("%s\n", mime ? PLAIN : "empty file");
377 * build histogram table
379 memset(cfreq, 0, sizeof(cfreq));
380 for (i = 0; language[i].name; i++)
381 language[i].count = 0;
382 eob = (char *)buf+nbuf;
383 for(n = 0, p = (char *)buf; p < eob; n++) {
384 if (!fullrune(p, eob-p) && eob-p < UTFmax)
386 p += chartorune(&r, p);
389 else if (r <= 0x7f) {
390 if (!isprint(r) && !isspace(r))
391 f = Ceascii; /* ASCII control char */
393 } else if (r == 0x80) {
397 f = Cbinary; /* Invalid Runes */
399 f = Clatin; /* Latin 1 */
402 f = Cutf; /* UTF extension */
404 cfreq[f]++; /* ASCII chars peg directly */
411 else if (cfreq[Cutf])
413 else if (cfreq[Clatin])
415 else if (cfreq[Ceascii])
417 else if (cfreq[Cnull])
422 * lookup dictionary words
424 memset(wfreq, 0, sizeof(wfreq));
425 if(guess == Fascii || guess == Flatin || guess == Futf)
428 * call individual classify routines
430 for(i=0; call[i]; i++)
436 * print out gross classification
438 if (nbuf < 100 && !mime)
439 print(mime ? PLAIN : "short ");
441 print("%s\n", mime ? PLAIN : "Ascii");
442 else if (guess == Feascii)
443 print("%s\n", mime ? PLAIN : "extended ascii");
444 else if (guess == Flatin)
445 print("%s\n", mime ? PLAIN : "latin ascii");
446 else if (guess == Futf && utf_count() < 4)
448 else print("%s\n", mime ? OCTET : "binary");
452 bump_utf_count(Rune r)
456 high = sizeof(language)/sizeof(language[0])-1;
457 for (low = 0; low < high;) {
459 if (r >= language[mid].low) {
460 if (r <= language[mid].high) {
461 language[mid].count++;
474 for (i = 0; language[i].name; i++)
475 if (language[i].count > 0)
476 switch (language[i].mode) {
492 for (i = 'a'; i < 'z'; i++)
495 for (i = 'A'; i < 'Z'; i++)
502 find_first(char *name)
506 for (i = 0; language[i].name != 0; i++)
507 if (language[i].mode == First
508 && strcmp(language[i].name, name) == 0)
519 print("%s\n", PLAIN);
527 for (i = 0; language[i].name; i++)
528 if (language[i].count) {
529 switch(language[i].mode) {
531 j = find_first(language[i].name);
534 if (language[j].count > 0)
542 print("%s", language[i].name);
557 int low, high, mid, r;
562 while (p < buf+nbuf && !isalpha(*p))
567 while(p < buf+nbuf && isalpha(*p))
571 high = sizeof(dict)/sizeof(dict[0]);
572 for(low = 0;low < high;) {
574 r = strcmp(dict[mid].word, (char*)p2);
576 wfreq[dict[mid].class]++;
588 typedef struct Filemagic Filemagic;
597 * integers in this table must be as seen on a little-endian machine
598 * when read from a file.
600 Filemagic long0tab[] = {
601 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
603 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
605 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
606 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
607 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
608 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
609 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
610 070707, 0xFFFF, "cpio archive", "application/x-cpio",
611 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
612 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
613 0xf0ff, 0xf6ff, "aac audio", "audio/mpeg",
614 /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
617 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
619 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
621 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
623 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
625 * venti & fossil magic numbers are stored big-endian on disk,
626 * thus the numbers appear reversed in this table.
628 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
629 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
630 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET,
634 filemagic(Filemagic *tab, int ntab, ulong x)
638 for(i=0; i<ntab; i++)
639 if((x&tab[i].mask) == tab[i].x){
640 print("%s\n", mime ? tab[i].mime : tab[i].desc);
649 return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
652 typedef struct Fileoffmag Fileoffmag;
659 * integers in this table must be as seen on a little-endian machine
660 * when read from a file.
662 Fileoffmag longofftab[] = {
664 * venti & fossil magic numbers are stored big-endian on disk,
665 * thus the numbers appear reversed in this table.
667 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
668 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
669 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
670 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
671 0x100, 0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
675 fileoffmagic(Fileoffmag *tab, int ntab)
680 uchar buf[sizeof(long)];
682 for(i=0; i<ntab; i++) {
684 seek(fd, tp->off, 0);
685 if (readn(fd, buf, sizeof buf) != sizeof buf)
688 if((x&tp->mask) == tp->x){
689 print("%s\n", mime ? tp->mime : tp->desc);
699 return fileoffmagic(longofftab, nelem(longofftab));
707 seek(fd, 0, 0); /* reposition to start of file */
708 if(crackhdr(fd, &f)) {
709 print("%s\n", mime ? OCTET : f.name);
717 enum { NAMSIZ = 100, TBLOCK = 512 };
732 char linkname[NAMSIZ];
733 /* rest are defined by POSIX's ustar format; see p1003.2b */
734 char magic[6]; /* "ustar" */
740 char prefix[155]; /* if non-null, path = prefix "/" name */
745 checksum(union hblock *hp)
749 struct header *hdr = &hp->dbuf;
751 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
754 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
764 union hblock *hp = (union hblock *)tblock;
765 struct header *hdr = &hp->dbuf;
767 seek(fd, 0, 0); /* reposition to start of file */
768 if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
770 chksum = strtol(hdr->chksum, 0, 8);
771 if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
772 if (strcmp(hdr->magic, "ustar") == 0)
773 print(mime? "application/x-ustar\n": "posix tar archive\n");
775 print(mime? "application/x-tar\n": "tar archive\n");
782 * initial words to classify file
792 "\x1f\x9d", "compressed", 2, "application/x-compress",
793 "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
794 "BZh", "bzip2 compressed", 3, "application/x-bzip2",
795 "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
796 "!<arch>\n", "archive", 8, "application/octet-stream",
797 "070707", "cpio archive - ascii header", 6, "application/octet-stream",
798 "#!/bin/rc", "rc executable file", 9, "text/plain",
799 "#!/bin/sh", "sh executable file", 9, "text/plain",
800 "%!", "postscript", 2, "application/postscript",
801 "\004%!", "postscript", 3, "application/postscript",
802 "x T post", "troff output for post", 8, "application/troff",
803 "x T Latin1", "troff output for Latin1", 10, "application/troff",
804 "x T utf", "troff output for UTF", 7, "application/troff",
805 "x T 202", "troff output for 202", 7, "application/troff",
806 "x T aps", "troff output for aps", 7, "application/troff",
807 "x T ", "troff output", 4, "application/troff",
808 "GIF", "GIF image", 3, "image/gif",
809 "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
810 "%PDF", "PDF", 4, "application/pdf",
811 "<!DOCTYPE", "HTML file", 9, "text/html",
812 "<!doctype", "HTML file", 9, "text/html",
813 "<!--", "HTML file", 4, "text/html",
814 "<html>", "HTML file", 6, "text/html",
815 "<HTML>", "HTML file", 6, "text/html",
816 "<?xml", "HTML file", 5, "text/html",
817 "\111\111\052\000", "tiff", 4, "image/tiff",
818 "\115\115\000\052", "tiff", 4, "image/tiff",
819 "\377\330\377\340", "jpeg", 4, "image/jpeg",
820 "\377\330\377\341", "jpeg", 4, "image/jpeg",
821 "\377\330\377\333", "jpeg", 4, "image/jpeg",
822 "\xff\xd8", "jpeg", 2, "image/jpeg",
823 "BM", "bmp", 2, "image/bmp",
824 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
825 "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
826 "\033E\033", "HP PCL printer data", 3, OCTET,
827 "\033&", "HP PCL printer data", 2, OCTET,
828 "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
829 "\033Lua", "Lua bytecode", 4, OCTET,
830 "ID3", "mp3 audio with id3", 3, "audio/mpeg",
831 "OggS", "ogg audio", 4, "audio/ogg",
832 ".snd", "sun audio", 4, "audio/basic",
833 "\211PNG", "PNG image", 4, "image/png",
834 "P1\n", "ppm", 3, "image/ppm",
835 "P2\n", "ppm", 3, "image/ppm",
836 "P3\n", "ppm", 3, "image/ppm",
837 "P4\n", "ppm", 3, "image/ppm",
838 "P5\n", "ppm", 3, "image/ppm",
839 "P6\n", "ppm", 3, "image/ppm",
840 "/* XPM */\n", "xbm", 10, "image/xbm",
841 ".HTML ", "troff -ms input", 6, "text/troff",
842 ".LP", "troff -ms input", 3, "text/troff",
843 ".ND", "troff -ms input", 3, "text/troff",
844 ".PP", "troff -ms input", 3, "text/troff",
845 ".TL", "troff -ms input", 3, "text/troff",
846 ".TR", "troff -ms input", 3, "text/troff",
847 ".TH", "manual page", 3, "text/troff",
848 ".\\\"", "troff input", 3, "text/troff",
849 ".de", "troff input", 3, "text/troff",
850 ".if", "troff input", 3, "text/troff",
851 ".nr", "troff input", 3, "text/troff",
852 ".tr", "troff input", 3, "text/troff",
853 "vac:", "venti score", 4, "text/plain",
854 "-----BEGIN CERTIFICATE-----\n",
855 "pem certificate", -1, "text/plain",
856 "-----BEGIN TRUSTED CERTIFICATE-----\n",
857 "pem trusted certificate", -1, "text/plain",
858 "-----BEGIN X509 CERTIFICATE-----\n",
859 "pem x.509 certificate", -1, "text/plain",
860 "subject=/C=", "pem certificate with header", -1, "text/plain",
861 "process snapshot ", "process snapshot", -1, "application/snapfs",
862 "d8:announce", "torrent file", 11, "application/x-bittorrent",
863 "[playlist]", "playlist", 10, "application/x-scpls",
864 "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
865 "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
866 "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
867 "AT&T", "DjVu document", 4, "image/vnd.djvu",
875 struct FILE_STRING *p;
877 for(p = file_string; p->key; p++) {
881 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
882 print("%s\n", mime ? p->mime : p->filetype);
886 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
887 for(i = 5; i < nbuf; i++)
891 print("%s\n", OCTET);
893 print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
904 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
915 for(p = offstrs; p->key; p++) {
920 if (readn(fd, buf, n) != n)
922 if(memcmp(buf, p->key, n) == 0) {
923 print("%s\n", mime ? p->mime : p->filetype);
933 if (strncmp((char*)buf, "FORM", 4) == 0 &&
934 strncmp((char*)buf+8, "AIFF", 4) == 0) {
935 print("%s\n", mime? "audio/x-aiff": "aiff audio");
938 if (strncmp((char*)buf, "RIFF", 4) == 0) {
939 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
940 print("%s\n", mime? "audio/wave": "wave audio");
941 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
942 print("%s\n", mime? "video/avi": "avi video");
944 print("%s\n", mime? "application/octet-stream": "riff file");
950 char* html_string[] = {
952 "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
954 "button", "center", "iframe", "object", "option", "script",
956 "blink", "embed", "frame", "input", "label", "param", "small",
957 "style", "table", "tbody", "tfoot", "thead", "title",
958 "?xml", "body", "code", "font", "form", "head", "html",
959 "link", "menu", "meta", "span",
960 "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
961 "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
962 "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
963 "a", "b", "i", "p", "q", "u",
976 while(p < buf+nbuf && *p != '<')
985 for(i = 0; html_string[i]; i++){
986 n = strlen(html_string[i]);
989 if(cistrncmp(html_string[i], (char*)p, n) == 0) {
991 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
993 print("%s\n", mime ? "text/html" : "HTML file");
1004 char* rfc822_string[] =
1026 q = strchr(p, '\n');
1030 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1037 if(*p != '\t' && *p != ' '){
1041 for(i = 0; rfc822_string[i]; i++) {
1042 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1051 print("%s\n", mime ? "message/rfc822" : "email file");
1063 q = strchr(p, '\n');
1067 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1068 print("%s\n", mime ? "text/plain" : "mail box");
1082 if(Binit(&b, fd, OREAD) == Beof)
1085 type = objtype(&b, &name);
1089 print("%s\n", OCTET);
1091 print("%s intermediate\n", name);
1104 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1106 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1111 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1116 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1122 print("%s\n", PLAIN);
1125 if(wfreq[Alword] > 0)
1126 print("alef program\n");
1128 print("c program\n");
1138 if(wfreq[Lword] < 4)
1140 print("%s\n", mime ? PLAIN : "limbo program");
1150 if(wfreq[Aword] < 2)
1152 print("%s\n", mime ? PLAIN : "as program");
1164 if((p[12] | p[13]<<8) == 0) /* width */
1166 if((p[14] | p[15]<<8) == 0) /* height */
1168 if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
1170 if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
1172 if(p[1] == 0){ /* non color-mapped */
1173 if((p[2]&3) != 2 && (p[2]&3) != 3)
1175 if((p[5] | p[6]<<8) != 0) /* palette length */
1178 if(p[1] == 1){ /* color-mapped */
1179 if((p[2]&3) != 1 || p[7] == 0)
1181 if((p[5] | p[6]<<8) == 0) /* palette length */
1185 print("%s\n", mime ? "image/tga" : "targa image");
1196 while((p < e) && (p = memchr(p, 0xFF, e - p))){
1197 if((p[1] & 0xFE) == 0xFA){
1198 print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1207 * low entropy means encrypted
1217 memset(bucket, 0, sizeof(bucket));
1218 for(i=nbuf-64; i<nbuf; i++)
1219 bucket[(buf[i]>>5)&07] += 1;
1223 cs += (bucket[i]-8)*(bucket[i]-8);
1226 if(buf[0]==0x1f && buf[1]==0x9d)
1227 print("%s\n", mime ? "application/x-compress" : "compressed");
1229 if(buf[0]==0x1f && buf[1]==0x8b)
1230 print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1232 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1233 print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1235 if(buf[0]==0x78 && buf[1]==0x9c)
1236 print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1238 print("%s\n", mime ? OCTET : "encrypted");
1245 * english by punctuation and frequencies
1250 int vow, comm, rare, badpun, punct;
1253 if(guess != Fascii && guess != Feascii)
1257 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1267 if(p[1] != ' ' && p[1] != '\n')
1270 if(badpun*5 > punct)
1272 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
1274 if(2*cfreq[';'] > cfreq['e'])
1278 for(p="AEIOU"; *p; p++) {
1280 vow += cfreq[tolower(*p)];
1283 for(p="ETAION"; *p; p++) {
1285 comm += cfreq[tolower(*p)];
1288 for(p="VJKQXZ"; *p; p++) {
1290 rare += cfreq[tolower(*p)];
1292 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1293 print("%s\n", mime ? PLAIN : "English text");
1300 * pick up a number with
1330 depthof(char *s, int *newp)
1337 while(s<es && *s==' ')
1341 if('0'<=*s && *s<='9')
1342 return 1<<strtol(s, 0, 0);
1346 while(s<es && *s!=' '){
1347 s++; /* skip letter */
1348 d += strtoul(s, &s, 10);
1351 if(d % 8 == 0 || 8 % d == 0)
1360 int dep, lox, loy, hix, hiy, px, new, cmpr;
1370 if(memcmp(cp, "compressed\n", 11) == 0) {
1375 dep = depthof((char*)cp + 0*P9BITLEN, &new);
1378 lox = p9bitnum(cp + 1*P9BITLEN);
1379 loy = p9bitnum(cp + 2*P9BITLEN);
1380 hix = p9bitnum(cp + 3*P9BITLEN);
1381 hiy = p9bitnum(cp + 4*P9BITLEN);
1382 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1386 px = 8/dep; /* pixels per byte */
1387 /* set l to number of bytes of data per scan line */
1389 len = (hix+px-1)/px - lox/px;
1390 else{ /* make positive before divide */
1393 len = (t+hix+px-1)/px;
1396 len = (hix-lox)*dep/8;
1397 len *= hiy - loy; /* col length */
1398 len += 5 * P9BITLEN; /* size of initial ascii */
1401 * for compressed images, don't look any further. otherwise:
1402 * for image file, length is non-zero and must match calculation above.
1403 * for /dev/window and /dev/screen the length is always zero.
1404 * for subfont, the subfont header should follow immediately.
1407 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1412 * mbuf->length == 0 probably indicates reading a pipe.
1413 * Ghostscript sometimes produces a little extra on the end.
1415 if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1416 mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1417 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1420 if (p9subfont(buf+len)) {
1421 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1432 /* if image too big, assume it's a subfont */
1433 if (p+3*P9BITLEN > buf+sizeof(buf))
1436 n = p9bitnum(p + 0*P9BITLEN); /* char count */
1439 h = p9bitnum(p + 1*P9BITLEN); /* height */
1442 a = p9bitnum(p + 2*P9BITLEN); /* ascent */
1448 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1455 char pathname[1024];
1458 if (!getfontnum(cp, &cp)) /* height */
1460 if (!getfontnum(cp, &cp)) /* ascent */
1462 for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1463 if (!getfontnum(cp, &cp)) /* min */
1465 if (!getfontnum(cp, &cp)) /* max */
1467 getfontnum(cp, &cp); /* optional offset */
1468 while (WHITESPACE(*cp))
1470 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1472 /* construct a path name, if needed */
1474 if (*p != '/' && slash) {
1476 if (n < sizeof(pathname))
1477 memcpy(pathname, fname, n);
1480 if (n+cp-p+4 < sizeof(pathname)) {
1481 memcpy(pathname+n, p, cp-p);
1484 if (access(pathname, AEXIST) < 0) {
1485 strcpy(pathname+n, ".0");
1486 if (access(pathname, AEXIST) < 0)
1492 print(mime ? "text/plain\n" : "font file\n");
1499 getfontnum(uchar *cp, uchar **rp)
1501 while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
1503 if (*cp < '0' || *cp > '9')
1505 strtoul((char *)cp, (char **)rp, 0);
1506 if (!WHITESPACE(**rp)) {
1516 if(strstr((char *)buf, "\\rtf1")){
1517 print(mime ? "application/rtf\n" : "rich text format\n");
1526 if (buf[0] == 0x4d && buf[1] == 0x5a){
1527 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1536 if(buf[0] || buf[1] || buf[3] || buf[9])
1538 if(buf[4] == 0x00 && buf[5] == 0x00)
1542 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1545 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1554 static char *cpu[] = { /* NB: incomplete and arbitary list */
1577 static char *type[] = {
1578 [1] "relocatable object",
1580 [3] "shared library",
1584 if (memcmp(buf, "\x7fELF", 4) == 0){
1587 int n = (buf[19] << 8) | buf[18];
1588 char *p = "unknown";
1589 char *t = "unknown";
1591 if (n > 0 && n < nelem(cpu) && cpu[n])
1594 /* try the other byte order */
1596 n = (buf[18] << 8) | buf[19];
1597 if (n > 0 && n < nelem(cpu) && cpu[n])
1601 n = (buf[16]<< 8) | buf[17];
1603 n = (buf[17]<< 8) | buf[16];
1605 if(n>0 && n < nelem(type) && type[n])
1607 print("%s ELF %s\n", p, t);
1610 print("application/x-elf-executable\n");
1620 int i, j, ldepth, l;
1624 for(j = 0; j < 3; j++){
1625 for(p = (char*)buf, i=0; i<3; i++){
1626 if(p[0] != '0' || p[1] != 'x')
1630 else if(buf[2+4] == ',')
1641 while(*p == ' ' || *p == '\t')
1649 print("application/x-face\n");
1651 print("face image depth %d\n", ldepth);