]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
485e8eecd04cd7e95bacdc101ccd0e9649fe5d45
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismung(void);
156 int     isp9bit(void);
157 int     isp9font(void);
158 int     isrtf(void);
159 int     ismsdos(void);
160 int     isicocur(void);
161 int     iself(void);
162 int     istring(void);
163 int     isoffstr(void);
164 int     iff(void);
165 int     long0(void);
166 int     longoff(void);
167 int     istar(void);
168 int     isface(void);
169 int     isexec(void);
170 int     p9bitnum(uchar*);
171 int     p9subfont(uchar*);
172 void    print_utf(void);
173 void    type(char*, int);
174 int     utf_count(void);
175 void    wordfreq(void);
176
177 int     (*call[])(void) =
178 {
179         long0,          /* recognizable by first 4 bytes */
180         istring,        /* recognizable by first string */
181         iself,          /* ELF (foreign) executable */
182         isexec,         /* native executables */
183         iff,            /* interchange file format (strings) */
184         longoff,        /* recognizable by 4 bytes at some offset */
185         isoffstr,       /* recognizable by string at some offset */
186         isrfc822,       /* email file */
187         ismbox,         /* mail box */
188         istar,          /* recognizable by tar checksum */
189         iscint,         /* compiler/assembler intermediate */
190         ishtml,         /* html keywords */
191         islimbo,        /* limbo source */
192         isc,            /* c & alef compiler key words */
193         isas,           /* assembler key words */
194         isp9font,       /* plan 9 font */
195         isp9bit,        /* plan 9 image (as from /dev/window) */
196         isrtf,          /* rich text format */
197         ismsdos,        /* msdos exe (virus file attachement) */
198         isicocur,               /* windows icon or cursor file */
199         isface,         /* ascii face file */
200         istga,
201         ismp3,
202
203         /* last resorts */
204         ismung,         /* entropy compressed/encrypted */
205         isenglish,      /* char frequency English */
206         0
207 };
208
209 int mime;
210
211 char OCTET[] =  "application/octet-stream";
212 char PLAIN[] =  "text/plain";
213
214 void
215 main(int argc, char *argv[])
216 {
217         int i, j, maxlen;
218         char *cp;
219         Rune r;
220
221         ARGBEGIN{
222         case 'm':
223                 mime = 1;
224                 break;
225         default:
226                 fprint(2, "usage: file [-m] [file...]\n");
227                 exits("usage");
228         }ARGEND;
229
230         maxlen = 0;
231         if(mime == 0 || argc > 1){
232                 for(i = 0; i < argc; i++) {
233                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
234                                         ;
235                         if(j > maxlen)
236                                 maxlen = j;
237                 }
238         }
239         if (argc <= 0) {
240                 if(!mime)
241                         print ("stdin: ");
242                 filetype(0);
243         }
244         else {
245                 for(i = 0; i < argc; i++)
246                         type(argv[i], maxlen);
247         }
248         exits(0);
249 }
250
251 void
252 type(char *file, int nlen)
253 {
254         Rune r;
255         int i;
256         char *p;
257
258         if(nlen > 0){
259                 slash = 0;
260                 for (i = 0, p = file; *p; i++) {
261                         if (*p == '/')                  /* find rightmost slash */
262                                 slash = p;
263                         p += chartorune(&r, p);         /* count runes */
264                 }
265                 print("%s:%*s",file, nlen-i+1, "");
266         }
267         fname = file;
268         if ((fd = open(file, OREAD)) < 0) {
269                 print("cannot open: %r\n");
270                 return;
271         }
272         filetype(fd);
273         close(fd);
274 }
275
276 void
277 utfconv(void)
278 {
279         Rune r;
280         uchar *rb;
281         char *p, *e;
282         int i;
283
284         if(nbuf < 4)
285                 return;
286
287         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
288                 if(!mime)
289                         print("utf-32be ");
290                 return;
291         } else
292         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
293                 if(!mime)
294                         print("utf-32le ");
295                 return;
296         } else
297         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298                 memmove(buf, buf+3, nbuf-3);
299                 nbuf -= 3;
300                 return;
301         } else
302         if(memcmp(buf, "\xFE\xFF", 2) == 0){
303                 if(!mime)
304                         print("utf-16be ");
305
306                 nbuf -= 2;
307                 rb = malloc(nbuf+1);
308                 memmove(rb, buf+2, nbuf);
309                 p = (char*)buf;
310                 e = p+sizeof(buf)-UTFmax-1;
311                 for(i=0; i<nbuf && p < e; i+=2){
312                         r = rb[i+1] | rb[i]<<8;
313                         p += runetochar(p, &r);
314                 }
315                 *p = 0;
316                 free(rb);
317                 nbuf = p - (char*)buf;
318         } else
319         if(memcmp(buf, "\xFF\xFE", 2) == 0){
320                 if(!mime)
321                         print("utf-16le ");
322
323                 nbuf -= 2;
324                 rb = malloc(nbuf+1);
325                 memmove(rb, buf+2, nbuf);
326                 p = (char*)buf;
327                 e = p+sizeof(buf)-UTFmax-1;
328                 for(i=0; i<nbuf && p < e; i+=2){
329                         r = rb[i] | rb[i+1]<<8;
330                         p += runetochar(p, &r);
331                 }
332                 *p = 0;
333                 free(rb);
334                 nbuf = p - (char*)buf;
335         }
336 }
337
338 void
339 filetype(int fd)
340 {
341         Rune r;
342         int i, f, n;
343         char *p, *eob;
344
345         free(mbuf);
346         mbuf = dirfstat(fd);
347         if(mbuf == nil){
348                 print("cannot stat: %r\n");
349                 return;
350         }
351         if(mbuf->mode & DMDIR) {
352                 print("%s\n", mime ? OCTET : "directory");
353                 return;
354         }
355         if(mbuf->type != 'M' && mbuf->type != '|') {
356                 if(mime)
357                         print("%s\n", OCTET);
358                 else
359                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
360                 return;
361         }
362         /* may be reading a pipe on standard input */
363         nbuf = readn(fd, buf, sizeof(buf)-1);
364         if(nbuf < 0) {
365                 print("cannot read: %r\n");
366                 return;
367         }
368         if(nbuf == 0) {
369                 print("%s\n", mime ? PLAIN : "empty file");
370                 return;
371         }
372         buf[nbuf] = 0;
373
374         utfconv();
375
376         /*
377          * build histogram table
378          */
379         memset(cfreq, 0, sizeof(cfreq));
380         for (i = 0; language[i].name; i++)
381                 language[i].count = 0;
382         eob = (char *)buf+nbuf;
383         for(n = 0, p = (char *)buf; p < eob; n++) {
384                 if (!fullrune(p, eob-p) && eob-p < UTFmax)
385                         break;
386                 p += chartorune(&r, p);
387                 if (r == 0)
388                         f = Cnull;
389                 else if (r <= 0x7f) {
390                         if (!isprint(r) && !isspace(r))
391                                 f = Ceascii;    /* ASCII control char */
392                         else f = r;
393                 } else if (r == 0x80) {
394                         bump_utf_count(r);
395                         f = Cutf;
396                 } else if (r < 0xA0)
397                         f = Cbinary;    /* Invalid Runes */
398                 else if (r <= 0xff)
399                         f = Clatin;     /* Latin 1 */
400                 else {
401                         bump_utf_count(r);
402                         f = Cutf;               /* UTF extension */
403                 }
404                 cfreq[f]++;                     /* ASCII chars peg directly */
405         }
406         /*
407          * gross classify
408          */
409         if (cfreq[Cbinary])
410                 guess = Fbinary;
411         else if (cfreq[Cutf])
412                 guess = Futf;
413         else if (cfreq[Clatin])
414                 guess = Flatin;
415         else if (cfreq[Ceascii])
416                 guess = Feascii;
417         else if (cfreq[Cnull])
418                 guess = Fbinary;
419         else
420                 guess = Fascii;
421         /*
422          * lookup dictionary words
423          */
424         memset(wfreq, 0, sizeof(wfreq));
425         if(guess == Fascii || guess == Flatin || guess == Futf)
426                 wordfreq();
427         /*
428          * call individual classify routines
429          */
430         for(i=0; call[i]; i++)
431                 if((*call[i])())
432                         return;
433
434         /*
435          * if all else fails,
436          * print out gross classification
437          */
438         if (nbuf < 100 && !mime)
439                 print(mime ? PLAIN : "short ");
440         if (guess == Fascii)
441                 print("%s\n", mime ? PLAIN : "Ascii");
442         else if (guess == Feascii)
443                 print("%s\n", mime ? PLAIN : "extended ascii");
444         else if (guess == Flatin)
445                 print("%s\n", mime ? PLAIN : "latin ascii");
446         else if (guess == Futf && utf_count() < 4)
447                 print_utf();
448         else print("%s\n", mime ? OCTET : "binary");
449 }
450
451 void
452 bump_utf_count(Rune r)
453 {
454         int low, high, mid;
455
456         high = sizeof(language)/sizeof(language[0])-1;
457         for (low = 0; low < high;) {
458                 mid = (low+high)/2;
459                 if (r >= language[mid].low) {
460                         if (r <= language[mid].high) {
461                                 language[mid].count++;
462                                 break;
463                         } else low = mid+1;
464                 } else high = mid;
465         }
466 }
467
468 int
469 utf_count(void)
470 {
471         int i, count;
472
473         count = 0;
474         for (i = 0; language[i].name; i++)
475                 if (language[i].count > 0)
476                         switch (language[i].mode) {
477                         case Normal:
478                         case First:
479                                 count++;
480                                 break;
481                         default:
482                                 break;
483                         }
484         return count;
485 }
486
487 int
488 chkascii(void)
489 {
490         int i;
491
492         for (i = 'a'; i < 'z'; i++)
493                 if (cfreq[i])
494                         return 1;
495         for (i = 'A'; i < 'Z'; i++)
496                 if (cfreq[i])
497                         return 1;
498         return 0;
499 }
500
501 int
502 find_first(char *name)
503 {
504         int i;
505
506         for (i = 0; language[i].name != 0; i++)
507                 if (language[i].mode == First
508                         && strcmp(language[i].name, name) == 0)
509                         return i;
510         return -1;
511 }
512
513 void
514 print_utf(void)
515 {
516         int i, printed, j;
517
518         if(mime){
519                 print("%s\n", PLAIN);
520                 return;
521         }
522         if (chkascii()) {
523                 printed = 1;
524                 print("Ascii");
525         } else
526                 printed = 0;
527         for (i = 0; language[i].name; i++)
528                 if (language[i].count) {
529                         switch(language[i].mode) {
530                         case Multi:
531                                 j = find_first(language[i].name);
532                                 if (j < 0)
533                                         break;
534                                 if (language[j].count > 0)
535                                         break;
536                                 /* Fall through */
537                         case Normal:
538                         case First:
539                                 if (printed)
540                                         print(" & ");
541                                 else printed = 1;
542                                 print("%s", language[i].name);
543                                 break;
544                         case Shared:
545                         default:
546                                 break;
547                         }
548                 }
549         if(!printed)
550                 print("UTF");
551         print(" text\n");
552 }
553
554 void
555 wordfreq(void)
556 {
557         int low, high, mid, r;
558         uchar *p, *p2, c;
559
560         p = buf;
561         for(;;) {
562                 while (p < buf+nbuf && !isalpha(*p))
563                         p++;
564                 if (p >= buf+nbuf)
565                         return;
566                 p2 = p;
567                 while(p < buf+nbuf && isalpha(*p))
568                         p++;
569                 c = *p;
570                 *p = 0;
571                 high = sizeof(dict)/sizeof(dict[0]);
572                 for(low = 0;low < high;) {
573                         mid = (low+high)/2;
574                         r = strcmp(dict[mid].word, (char*)p2);
575                         if(r == 0) {
576                                 wfreq[dict[mid].class]++;
577                                 break;
578                         }
579                         if(r < 0)
580                                 low = mid+1;
581                         else
582                                 high = mid;
583                 }
584                 *p++ = c;
585         }
586 }
587
588 typedef struct Filemagic Filemagic;
589 struct Filemagic {
590         ulong x;
591         ulong mask;
592         char *desc;
593         char *mime;
594 };
595
596 /*
597  * integers in this table must be as seen on a little-endian machine
598  * when read from a file.
599  */
600 Filemagic long0tab[] = {
601         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
602         /* "pac1" */
603         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
604         /* "pXc2 */
605         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
606         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
607         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
608         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
609         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
610         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
611         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
612         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
613         0xf0ff,         0xf6ff,         "aac audio\n",  "audio/mpeg",
614         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
616         /* 0xfeedfacf */
617         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
618         /* 0xcefaedfe */
619         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
620         /* 0xcffaedfe */
621         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
622         /* 0xcafebabe */
623         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
624         /*
625          * venti & fossil magic numbers are stored big-endian on disk,
626          * thus the numbers appear reversed in this table.
627          */
628         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
629         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
630 };
631
632 int
633 filemagic(Filemagic *tab, int ntab, ulong x)
634 {
635         int i;
636
637         for(i=0; i<ntab; i++)
638                 if((x&tab[i].mask) == tab[i].x){
639                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
640                         return 1;
641                 }
642         return 0;
643 }
644
645 int
646 long0(void)
647 {
648         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
649 }
650
651 typedef struct Fileoffmag Fileoffmag;
652 struct Fileoffmag {
653         ulong   off;
654         Filemagic;
655 };
656
657 /*
658  * integers in this table must be as seen on a little-endian machine
659  * when read from a file.
660  */
661 Fileoffmag longofftab[] = {
662         /*
663          * venti & fossil magic numbers are stored big-endian on disk,
664          * thus the numbers appear reversed in this table.
665          */
666         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
667         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
668         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
669         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
670 };
671
672 int
673 fileoffmagic(Fileoffmag *tab, int ntab)
674 {
675         int i;
676         ulong x;
677         Fileoffmag *tp;
678         uchar buf[sizeof(long)];
679
680         for(i=0; i<ntab; i++) {
681                 tp = tab + i;
682                 seek(fd, tp->off, 0);
683                 if (readn(fd, buf, sizeof buf) != sizeof buf)
684                         continue;
685                 x = LENDIAN(buf);
686                 if((x&tp->mask) == tp->x){
687                         print("%s\n", mime ? tp->mime : tp->desc);
688                         return 1;
689                 }
690         }
691         return 0;
692 }
693
694 int
695 longoff(void)
696 {
697         return fileoffmagic(longofftab, nelem(longofftab));
698 }
699
700 int
701 isexec(void)
702 {
703         Fhdr f;
704
705         seek(fd, 0, 0);         /* reposition to start of file */
706         if(crackhdr(fd, &f)) {
707                 print("%s\n", mime ? OCTET : f.name);
708                 return 1;
709         }
710         return 0;
711 }
712
713
714 /* from tar.c */
715 enum { NAMSIZ = 100, TBLOCK = 512 };
716
717 union   hblock
718 {
719         char    dummy[TBLOCK];
720         struct  header
721         {
722                 char    name[NAMSIZ];
723                 char    mode[8];
724                 char    uid[8];
725                 char    gid[8];
726                 char    size[12];
727                 char    mtime[12];
728                 char    chksum[8];
729                 char    linkflag;
730                 char    linkname[NAMSIZ];
731                 /* rest are defined by POSIX's ustar format; see p1003.2b */
732                 char    magic[6];       /* "ustar" */
733                 char    version[2];
734                 char    uname[32];
735                 char    gname[32];
736                 char    devmajor[8];
737                 char    devminor[8];
738                 char    prefix[155];  /* if non-null, path = prefix "/" name */
739         } dbuf;
740 };
741
742 int
743 checksum(union hblock *hp)
744 {
745         int i;
746         char *cp;
747         struct header *hdr = &hp->dbuf;
748
749         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
750                 *cp = ' ';
751         i = 0;
752         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
753                 i += *cp & 0xff;
754         return i;
755 }
756
757 int
758 istar(void)
759 {
760         int chksum;
761         char tblock[TBLOCK];
762         union hblock *hp = (union hblock *)tblock;
763         struct header *hdr = &hp->dbuf;
764
765         seek(fd, 0, 0);         /* reposition to start of file */
766         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
767                 return 0;
768         chksum = strtol(hdr->chksum, 0, 8);
769         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
770                 if (strcmp(hdr->magic, "ustar") == 0)
771                         print(mime? "application/x-ustar\n": "posix tar archive\n");
772                 else
773                         print(mime? "application/x-tar\n": "tar archive\n");
774                 return 1;
775         }
776         return 0;
777 }
778
779 /*
780  * initial words to classify file
781  */
782 struct  FILE_STRING
783 {
784         char    *key;
785         char    *filetype;
786         int     length;
787         char    *mime;
788 } file_string[] =
789 {
790         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
791         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
792         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
793         "!<arch>\n__.SYMDEF",   "archive random library",       16,     "application/octet-stream",
794         "!<arch>\n",            "archive",                      8,      "application/octet-stream",
795         "070707",               "cpio archive - ascii header",  6,      "application/octet-stream",
796         "#!/bin/rc",            "rc executable file",           9,      "text/plain",
797         "#!/bin/sh",            "sh executable file",           9,      "text/plain",
798         "%!",                   "postscript",                   2,      "application/postscript",
799         "\004%!",               "postscript",                   3,      "application/postscript",
800         "x T post",             "troff output for post",        8,      "application/troff",
801         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
802         "x T utf",              "troff output for UTF",         7,      "application/troff",
803         "x T 202",              "troff output for 202",         7,      "application/troff",
804         "x T aps",              "troff output for aps",         7,      "application/troff",
805         "x T ",                 "troff output",                 4,      "application/troff",
806         "GIF",                  "GIF image",                    3,      "image/gif",
807         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
808         "%PDF",                 "PDF",                          4,      "application/pdf",
809         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
810         "<!doctype",            "HTML file",                    9,      "text/html",
811         "<!--",                 "HTML file",                    4,      "text/html",
812         "<html>",               "HTML file",                    6,      "text/html",
813         "<HTML>",               "HTML file",                    6,      "text/html",
814         "<?xml",                "HTML file",                    5,      "text/html",
815         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
816         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
817         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
818         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
819         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
820         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
821         "BM",                   "bmp",                          2,      "image/bmp", 
822         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
823         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
824         "\033E\033",    "HP PCL printer data",          3,      OCTET,
825         "\033&",        "HP PCL printer data",          2,      OCTET,
826         "\033%-12345X", "HPJCL file",           9,      "application/hpjcl",
827         "\033Lua",              "Lua bytecode",         4,      OCTET,
828         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
829         "OggS",                 "ogg audio",            4,      "audio/ogg",
830         ".snd",                 "sun audio",            4,      "audio/basic",
831         "\211PNG",              "PNG image",            4,      "image/png",
832         "P1\n",                 "ppm",                          3,      "image/ppm",
833         "P2\n",                 "ppm",                          3,      "image/ppm",
834         "P3\n",                 "ppm",                          3,      "image/ppm",
835         "P4\n",                 "ppm",                          3,      "image/ppm",
836         "P5\n",                 "ppm",                          3,      "image/ppm",
837         "P6\n",                 "ppm",                          3,      "image/ppm",
838         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
839         ".HTML ",               "troff -ms input",      6,      "text/troff",
840         ".LP",                  "troff -ms input",      3,      "text/troff",
841         ".ND",                  "troff -ms input",      3,      "text/troff",
842         ".PP",                  "troff -ms input",      3,      "text/troff",
843         ".TL",                  "troff -ms input",      3,      "text/troff",
844         ".TR",                  "troff -ms input",      3,      "text/troff",
845         ".TH",                  "manual page",          3,      "text/troff",
846         ".\\\"",                "troff input",          3,      "text/troff",
847         ".de",                  "troff input",          3,      "text/troff",
848         ".if",                  "troff input",          3,      "text/troff",
849         ".nr",                  "troff input",          3,      "text/troff",
850         ".tr",                  "troff input",          3,      "text/troff",
851         "vac:",                 "venti score",          4,      "text/plain",
852         "-----BEGIN CERTIFICATE-----\n",
853                                 "pem certificate",      -1,     "text/plain",
854         "-----BEGIN TRUSTED CERTIFICATE-----\n",
855                                 "pem trusted certificate", -1,  "text/plain",
856         "-----BEGIN X509 CERTIFICATE-----\n",
857                                 "pem x.509 certificate", -1,    "text/plain",
858         "subject=/C=",          "pem certificate with header", -1, "text/plain",
859         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
860         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
861         "[playlist]",           "playlist",             10,     "application/x-scpls",
862         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
863         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
864         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
865         0,0,0,0
866 };
867
868 int
869 istring(void)
870 {
871         int i, l;
872         struct FILE_STRING *p;
873
874         for(p = file_string; p->key; p++) {
875                 l = p->length;
876                 if(l == -1)
877                         l = strlen(p->key);
878                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
879                         print("%s\n", mime ? p->mime : p->filetype);
880                         return 1;
881                 }
882         }
883         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
884                 for(i = 5; i < nbuf; i++)
885                         if(buf[i] == '\n')
886                                 break;
887                 if(mime)
888                         print("%s\n", OCTET);
889                 else
890                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
891                 return 1;
892         }
893         return 0;
894 }
895
896 struct offstr
897 {
898         ulong   off;
899         struct FILE_STRING;
900 } offstrs[] = {
901         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
902         0, 0, 0, 0, 0
903 };
904
905 int
906 isoffstr(void)
907 {
908         int n;
909         char buf[256];
910         struct offstr *p;
911
912         for(p = offstrs; p->key; p++) {
913                 seek(fd, p->off, 0);
914                 n = p->length;
915                 if (n > sizeof buf)
916                         n = sizeof buf;
917                 if (readn(fd, buf, n) != n)
918                         continue;
919                 if(memcmp(buf, p->key, n) == 0) {
920                         print("%s\n", mime ? p->mime : p->filetype);
921                         return 1;
922                 }
923         }
924         return 0;
925 }
926
927 int
928 iff(void)
929 {
930         if (strncmp((char*)buf, "FORM", 4) == 0 &&
931             strncmp((char*)buf+8, "AIFF", 4) == 0) {
932                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
933                 return 1;
934         }
935         if (strncmp((char*)buf, "RIFF", 4) == 0) {
936                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
937                         print("%s\n", mime? "audio/wave": "wave audio");
938                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
939                         print("%s\n", mime? "video/avi": "avi video");
940                 else
941                         print("%s\n", mime? "application/octet-stream": "riff file");
942                 return 1;
943         }
944         return 0;
945 }
946
947 char*   html_string[] = {
948         "blockquote",
949         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
950         "caption",
951         "button", "center", "iframe", "object", "option", "script",
952         "select", "strong",
953         "blink", "embed", "frame", "input", "label", "param", "small",
954         "style", "table", "tbody", "tfoot", "thead", "title",
955         "?xml", "body", "code", "font", "form", "head", "html",
956         "link", "menu", "meta", "span",
957         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
958         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
959         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
960         "a", "b", "i", "p", "q", "u",
961         0,
962 };
963
964 int
965 ishtml(void)
966 {
967         int i, n, count;
968         uchar *p;
969
970         count = 0;
971         p = buf;
972         for(;;) {
973                 while(p < buf+nbuf && *p != '<')
974                         p++;
975                 p++;
976                 if (p >= buf+nbuf)
977                         break;
978                 if(*p == '/')
979                         p++;
980                 if(p >= buf+nbuf)
981                         break;
982                 for(i = 0; html_string[i]; i++){
983                         n = strlen(html_string[i]);
984                         if(p + n > buf+nbuf)
985                                 continue;
986                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
987                                 p += n;
988                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
989                                         if(++count > 2) {
990                                                 print("%s\n", mime ? "text/html" : "HTML file");
991                                                 return 1;
992                                         }
993                                 }
994                                 break;
995                         }
996                 }
997         }
998         return 0;
999 }
1000
1001 char*   rfc822_string[] =
1002 {
1003         "from:",
1004         "date:",
1005         "to:",
1006         "subject:",
1007         "received:",
1008         "reply to:",
1009         "sender:",
1010         0,
1011 };
1012
1013 int
1014 isrfc822(void)
1015 {
1016
1017         char *p, *q, *r;
1018         int i, count;
1019
1020         count = 0;
1021         p = (char*)buf;
1022         for(;;) {
1023                 q = strchr(p, '\n');
1024                 if(q == nil)
1025                         break;
1026                 *q = 0;
1027                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1028                         count++;
1029                         *q = '\n';
1030                         p = q+1;
1031                         continue;
1032                 }
1033                 *q = '\n';
1034                 if(*p != '\t' && *p != ' '){
1035                         r = strchr(p, ':');
1036                         if(r == 0 || r > q)
1037                                 break;
1038                         for(i = 0; rfc822_string[i]; i++) {
1039                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1040                                         count++;
1041                                         break;
1042                                 }
1043                         }
1044                 }
1045                 p = q+1;
1046         }
1047         if(count >= 3){
1048                 print("%s\n", mime ? "message/rfc822" : "email file");
1049                 return 1;
1050         }
1051         return 0;
1052 }
1053
1054 int
1055 ismbox(void)
1056 {
1057         char *p, *q;
1058
1059         p = (char*)buf;
1060         q = strchr(p, '\n');
1061         if(q == nil)
1062                 return 0;
1063         *q = 0;
1064         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1065                 print("%s\n", mime ? "text/plain" : "mail box");
1066                 return 1;
1067         }
1068         *q = '\n';
1069         return 0;
1070 }
1071
1072 int
1073 iscint(void)
1074 {
1075         int type;
1076         char *name;
1077         Biobuf b;
1078
1079         if(Binit(&b, fd, OREAD) == Beof)
1080                 return 0;
1081         seek(fd, 0, 0);
1082         type = objtype(&b, &name);
1083         if(type < 0)
1084                 return 0;
1085         if(mime)
1086                 print("%s\n", OCTET);
1087         else
1088                 print("%s intermediate\n", name);
1089         return 1;
1090 }
1091
1092 int
1093 isc(void)
1094 {
1095         int n;
1096
1097         n = wfreq[I1];
1098         /*
1099          * includes
1100          */
1101         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1102                 goto yes;
1103         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1104                 goto yes;
1105         /*
1106          * declarations
1107          */
1108         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1109                 goto yes;
1110         /*
1111          * assignments
1112          */
1113         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1114                 goto yes;
1115         return 0;
1116
1117 yes:
1118         if(mime){
1119                 print("%s\n", PLAIN);
1120                 return 1;
1121         }
1122         if(wfreq[Alword] > 0)
1123                 print("alef program\n");
1124         else
1125                 print("c program\n");
1126         return 1;
1127 }
1128
1129 int
1130 islimbo(void)
1131 {
1132         /*
1133          * includes
1134          */
1135         if(wfreq[Lword] < 4)
1136                 return 0;
1137         print("%s\n", mime ? PLAIN : "limbo program");
1138         return 1;
1139 }
1140
1141 int
1142 isas(void)
1143 {
1144         /*
1145          * includes
1146          */
1147         if(wfreq[Aword] < 2)
1148                 return 0;
1149         print("%s\n", mime ? PLAIN : "as program");
1150         return 1;
1151 }
1152
1153 int
1154 istga(void)
1155 {
1156         uchar *p;
1157
1158         p = buf;
1159         if(nbuf < 18)
1160                 return 0;
1161         if((p[12] | p[13]<<8) == 0)     /* width */
1162                 return 0;
1163         if((p[14] | p[15]<<8) == 0)     /* height */
1164                 return 0;
1165         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1166                 return 0;
1167         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1168                 return 0;
1169         if(p[1] == 0){  /* non color-mapped */
1170                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1171                         return 0;
1172                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1173                         return 0;
1174         } else
1175         if(p[1] == 1){  /* color-mapped */
1176                 if((p[2]&3) != 1 || p[7] == 0)  
1177                         return 0;
1178                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1179                         return 0;
1180         } else
1181                 return 0;
1182         print("%s\n", mime ? "image/tga" : "targa image");
1183         return 1;
1184 }
1185
1186 int
1187 ismp3(void)
1188 {
1189         uchar *p, *e;
1190
1191         p = buf;
1192         e = p + nbuf-1;
1193         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1194                 if((p[1] & 0xFE) == 0xFA){
1195                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1196                         return 1;
1197                 }
1198                 p++;
1199         }
1200         return 0;
1201 }
1202
1203 /*
1204  * low entropy means encrypted
1205  */
1206 int
1207 ismung(void)
1208 {
1209         int i, bucket[8];
1210         float cs;
1211
1212         if(nbuf < 64)
1213                 return 0;
1214         memset(bucket, 0, sizeof(bucket));
1215         for(i=nbuf-64; i<nbuf; i++)
1216                 bucket[(buf[i]>>5)&07] += 1;
1217
1218         cs = 0.;
1219         for(i=0; i<8; i++)
1220                 cs += (bucket[i]-8)*(bucket[i]-8);
1221         cs /= 8.;
1222         if(cs <= 24.322) {
1223                 if(buf[0]==0x1f && buf[1]==0x9d)
1224                         print("%s\n", mime ? "application/x-compress" : "compressed");
1225                 else
1226                 if(buf[0]==0x1f && buf[1]==0x8b)
1227                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1228                 else
1229                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1230                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1231                 else
1232                 if(buf[0]==0x78 && buf[1]==0x9c)
1233                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1234                 else
1235                         print("%s\n", mime ? OCTET : "encrypted");
1236                 return 1;
1237         }
1238         return 0;
1239 }
1240
1241 /*
1242  * english by punctuation and frequencies
1243  */
1244 int
1245 isenglish(void)
1246 {
1247         int vow, comm, rare, badpun, punct;
1248         char *p;
1249
1250         if(guess != Fascii && guess != Feascii)
1251                 return 0;
1252         badpun = 0;
1253         punct = 0;
1254         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1255                 switch(*p) {
1256                 case '.':
1257                 case ',':
1258                 case ')':
1259                 case '%':
1260                 case ';':
1261                 case ':':
1262                 case '?':
1263                         punct++;
1264                         if(p[1] != ' ' && p[1] != '\n')
1265                                 badpun++;
1266                 }
1267         if(badpun*5 > punct)
1268                 return 0;
1269         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1270                 return 0;
1271         if(2*cfreq[';'] > cfreq['e'])
1272                 return 0;
1273
1274         vow = 0;
1275         for(p="AEIOU"; *p; p++) {
1276                 vow += cfreq[*p];
1277                 vow += cfreq[tolower(*p)];
1278         }
1279         comm = 0;
1280         for(p="ETAION"; *p; p++) {
1281                 comm += cfreq[*p];
1282                 comm += cfreq[tolower(*p)];
1283         }
1284         rare = 0;
1285         for(p="VJKQXZ"; *p; p++) {
1286                 rare += cfreq[*p];
1287                 rare += cfreq[tolower(*p)];
1288         }
1289         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1290                 print("%s\n", mime ? PLAIN : "English text");
1291                 return 1;
1292         }
1293         return 0;
1294 }
1295
1296 /*
1297  * pick up a number with
1298  * syntax _*[0-9]+_
1299  */
1300 #define P9BITLEN        12
1301 int
1302 p9bitnum(uchar *bp)
1303 {
1304         int n, c, len;
1305
1306         len = P9BITLEN;
1307         while(*bp == ' ') {
1308                 bp++;
1309                 len--;
1310                 if(len <= 0)
1311                         return -1;
1312         }
1313         n = 0;
1314         while(len > 1) {
1315                 c = *bp++;
1316                 if(!isdigit(c))
1317                         return -1;
1318                 n = n*10 + c-'0';
1319                 len--;
1320         }
1321         if(*bp != ' ')
1322                 return -1;
1323         return n;
1324 }
1325
1326 int
1327 depthof(char *s, int *newp)
1328 {
1329         char *es;
1330         int d;
1331
1332         *newp = 0;
1333         es = s+12;
1334         while(s<es && *s==' ')
1335                 s++;
1336         if(s == es)
1337                 return -1;
1338         if('0'<=*s && *s<='9')
1339                 return 1<<strtol(s, 0, 0);
1340
1341         *newp = 1;
1342         d = 0;
1343         while(s<es && *s!=' '){
1344                 s++;                    /* skip letter */
1345                 d += strtoul(s, &s, 10);
1346         }
1347
1348         if(d % 8 == 0 || 8 % d == 0)
1349                 return d;
1350         else
1351                 return -1;
1352 }
1353
1354 int
1355 isp9bit(void)
1356 {
1357         int dep, lox, loy, hix, hiy, px, new, cmpr;
1358         ulong t;
1359         long len;
1360         char *newlabel;
1361         uchar *cp;
1362
1363         cp = buf;
1364         cmpr = 0;
1365         newlabel = "old ";
1366
1367         if(memcmp(cp, "compressed\n", 11) == 0) {
1368                 cmpr = 1;
1369                 cp = buf + 11;
1370         }
1371
1372         dep = depthof((char*)cp + 0*P9BITLEN, &new);
1373         if(new)
1374                 newlabel = "";
1375         lox = p9bitnum(cp + 1*P9BITLEN);
1376         loy = p9bitnum(cp + 2*P9BITLEN);
1377         hix = p9bitnum(cp + 3*P9BITLEN);
1378         hiy = p9bitnum(cp + 4*P9BITLEN);
1379         if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1380                 return 0;
1381
1382         if(dep < 8){
1383                 px = 8/dep;             /* pixels per byte */
1384                 /* set l to number of bytes of data per scan line */
1385                 if(lox >= 0)
1386                         len = (hix+px-1)/px - lox/px;
1387                 else{                   /* make positive before divide */
1388                         t = (-lox)+px-1;
1389                         t = (t/px)*px;
1390                         len = (t+hix+px-1)/px;
1391                 }
1392         }else
1393                 len = (hix-lox)*dep/8;
1394         len *= hiy - loy;               /* col length */
1395         len += 5 * P9BITLEN;            /* size of initial ascii */
1396
1397         /*
1398          * for compressed images, don't look any further. otherwise:
1399          * for image file, length is non-zero and must match calculation above.
1400          * for /dev/window and /dev/screen the length is always zero.
1401          * for subfont, the subfont header should follow immediately.
1402          */
1403         if (cmpr) {
1404                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1405                         newlabel, dep);
1406                 return 1;
1407         }
1408         /*
1409          * mbuf->length == 0 probably indicates reading a pipe.
1410          * Ghostscript sometimes produces a little extra on the end.
1411          */
1412         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1413             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1414                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1415                 return 1;
1416         }
1417         if (p9subfont(buf+len)) {
1418                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1419                 return 1;
1420         }
1421         return 0;
1422 }
1423
1424 int
1425 p9subfont(uchar *p)
1426 {
1427         int n, h, a;
1428
1429         /* if image too big, assume it's a subfont */
1430         if (p+3*P9BITLEN > buf+sizeof(buf))
1431                 return 1;
1432
1433         n = p9bitnum(p + 0*P9BITLEN);   /* char count */
1434         if (n < 0)
1435                 return 0;
1436         h = p9bitnum(p + 1*P9BITLEN);   /* height */
1437         if (h < 0)
1438                 return 0;
1439         a = p9bitnum(p + 2*P9BITLEN);   /* ascent */
1440         if (a < 0)
1441                 return 0;
1442         return 1;
1443 }
1444
1445 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1446
1447 int
1448 isp9font(void)
1449 {
1450         uchar *cp, *p;
1451         int i, n;
1452         char pathname[1024];
1453
1454         cp = buf;
1455         if (!getfontnum(cp, &cp))       /* height */
1456                 return 0;
1457         if (!getfontnum(cp, &cp))       /* ascent */
1458                 return 0;
1459         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1460                 if (!getfontnum(cp, &cp))       /* min */
1461                         break;
1462                 if (!getfontnum(cp, &cp))       /* max */
1463                         return 0;
1464                 getfontnum(cp, &cp);    /* optional offset */
1465                 while (WHITESPACE(*cp))
1466                         cp++;
1467                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1468                                 ;
1469                         /* construct a path name, if needed */
1470                 n = 0;
1471                 if (*p != '/' && slash) {
1472                         n = slash-fname+1;
1473                         if (n < sizeof(pathname))
1474                                 memcpy(pathname, fname, n);
1475                         else n = 0;
1476                 }
1477                 if (n+cp-p+4 < sizeof(pathname)) {
1478                         memcpy(pathname+n, p, cp-p);
1479                         n += cp-p;
1480                         pathname[n] = 0;
1481                         if (access(pathname, AEXIST) < 0) {
1482                                 strcpy(pathname+n, ".0");
1483                                 if (access(pathname, AEXIST) < 0)
1484                                         return 0;
1485                         }
1486                 }
1487         }
1488         if (i) {
1489                 print(mime ? "text/plain\n" : "font file\n");
1490                 return 1;
1491         }
1492         return 0;
1493 }
1494
1495 int
1496 getfontnum(uchar *cp, uchar **rp)
1497 {
1498         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1499                 cp++;
1500         if (*cp < '0' || *cp > '9')
1501                 return 0;
1502         strtoul((char *)cp, (char **)rp, 0);
1503         if (!WHITESPACE(**rp)) {
1504                 *rp = cp;
1505                 return 0;
1506         }
1507         return 1;
1508 }
1509
1510 int
1511 isrtf(void)
1512 {
1513         if(strstr((char *)buf, "\\rtf1")){
1514                 print(mime ? "application/rtf\n" : "rich text format\n");
1515                 return 1;
1516         }
1517         return 0;
1518 }
1519
1520 int
1521 ismsdos(void)
1522 {
1523         if (buf[0] == 0x4d && buf[1] == 0x5a){
1524                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1525                 return 1;
1526         }
1527         return 0;
1528 }
1529
1530 int
1531 isicocur(void)
1532 {
1533         if(buf[0] || buf[1] || buf[3] || buf[9])
1534                 return 0;
1535         if(buf[4] == 0x00 && buf[5] == 0x00)
1536                 return 0;
1537         switch(buf[2]){
1538         case 1:
1539                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1540                 return 1;
1541         case 2:
1542                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1543                 return 1;
1544         }
1545         return 0;
1546 }
1547
1548 int
1549 iself(void)
1550 {
1551         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1552         [1]     "WE32100",
1553         [2]     "SPARC",
1554         [3]     "i386",
1555         [4]     "M68000",
1556         [5]     "M88000",
1557         [6]     "i486",
1558         [7]     "i860",
1559         [8]     "R3000",
1560         [9]     "S370",
1561         [10]    "R4000",
1562         [15]    "HP-PA",
1563         [18]    "sparc v8+",
1564         [19]    "i960",
1565         [20]    "PPC-32",
1566         [21]    "PPC-64",
1567         [40]    "ARM",
1568         [41]    "Alpha",
1569         [43]    "sparc v9",
1570         [50]    "IA-64",
1571         [62]    "AMD64",
1572         [75]    "VAX",
1573         };
1574         static char *type[] = {
1575         [1]     "relocatable object",
1576         [2]     "executable",
1577         [3]     "shared library",
1578         [4]     "core dump",
1579         };
1580
1581         if (memcmp(buf, "\x7fELF", 4) == 0){
1582                 if (!mime){
1583                         int isdifend = 0;
1584                         int n = (buf[19] << 8) | buf[18];
1585                         char *p = "unknown";
1586                         char *t = "unknown";
1587
1588                         if (n > 0 && n < nelem(cpu) && cpu[n])
1589                                 p = cpu[n];
1590                         else {
1591                                 /* try the other byte order */
1592                                 isdifend = 1;
1593                                 n = (buf[18] << 8) | buf[19];
1594                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1595                                         p = cpu[n];
1596                         }
1597                         if(isdifend)
1598                                 n = (buf[16]<< 8) | buf[17];
1599                         else
1600                                 n = (buf[17]<< 8) | buf[16];
1601
1602                         if(n>0 && n < nelem(type) && type[n])
1603                                 t = type[n];
1604                         print("%s ELF %s\n", p, t);
1605                 }
1606                 else
1607                         print("application/x-elf-executable\n");
1608                 return 1;
1609         }
1610
1611         return 0;
1612 }
1613
1614 int
1615 isface(void)
1616 {
1617         int i, j, ldepth, l;
1618         char *p;
1619
1620         ldepth = -1;
1621         for(j = 0; j < 3; j++){
1622                 for(p = (char*)buf, i=0; i<3; i++){
1623                         if(p[0] != '0' || p[1] != 'x')
1624                                 return 0;
1625                         if(buf[2+8] == ',')
1626                                 l = 2;
1627                         else if(buf[2+4] == ',')
1628                                 l = 1;
1629                         else
1630                                 return 0;
1631                         if(ldepth == -1)
1632                                 ldepth = l;
1633                         if(l != ldepth)
1634                                 return 0;
1635                         strtoul(p, &p, 16);
1636                         if(*p++ != ',')
1637                                 return 0;
1638                         while(*p == ' ' || *p == '\t')
1639                                 p++;
1640                 }
1641                 if (*p++ != '\n')
1642                         return 0;
1643         }
1644
1645         if(mime)
1646                 print("application/x-face\n");
1647         else
1648                 print("face image depth %d\n", ldepth);
1649         return 1;
1650 }
1651