]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
fixed the bug that arisawa posted about on 9fans where file(1) would mistake 386...
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismung(void);
156 int     isp9bit(void);
157 int     isp9font(void);
158 int     isrtf(void);
159 int     ismsdos(void);
160 int     isicocur(void);
161 int     iself(void);
162 int     istring(void);
163 int     isoffstr(void);
164 int     iff(void);
165 int     long0(void);
166 int     longoff(void);
167 int     istar(void);
168 int     isface(void);
169 int     isexec(void);
170 int     p9bitnum(uchar*);
171 int     p9subfont(uchar*);
172 void    print_utf(void);
173 void    type(char*, int);
174 int     utf_count(void);
175 void    wordfreq(void);
176
177 int     (*call[])(void) =
178 {
179         long0,          /* recognizable by first 4 bytes */
180         istring,        /* recognizable by first string */
181         iself,          /* ELF (foreign) executable */
182         isexec,         /* native executables */
183         iff,            /* interchange file format (strings) */
184         longoff,        /* recognizable by 4 bytes at some offset */
185         isoffstr,       /* recognizable by string at some offset */
186         isrfc822,       /* email file */
187         ismbox,         /* mail box */
188         istar,          /* recognizable by tar checksum */
189         iscint,         /* compiler/assembler intermediate */
190         ishtml,         /* html keywords */
191         islimbo,        /* limbo source */
192         isc,            /* c & alef compiler key words */
193         isas,           /* assembler key words */
194         isp9font,       /* plan 9 font */
195         isp9bit,        /* plan 9 image (as from /dev/window) */
196         isrtf,          /* rich text format */
197         ismsdos,        /* msdos exe (virus file attachement) */
198         isicocur,               /* windows icon or cursor file */
199         isface,         /* ascii face file */
200         istga,
201         ismp3,
202
203         /* last resorts */
204         ismung,         /* entropy compressed/encrypted */
205         isenglish,      /* char frequency English */
206         0
207 };
208
209 int mime;
210
211 char OCTET[] =  "application/octet-stream";
212 char PLAIN[] =  "text/plain";
213
214 void
215 main(int argc, char *argv[])
216 {
217         int i, j, maxlen;
218         char *cp;
219         Rune r;
220
221         ARGBEGIN{
222         case 'm':
223                 mime = 1;
224                 break;
225         default:
226                 fprint(2, "usage: file [-m] [file...]\n");
227                 exits("usage");
228         }ARGEND;
229
230         maxlen = 0;
231         if(mime == 0 || argc > 1){
232                 for(i = 0; i < argc; i++) {
233                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
234                                         ;
235                         if(j > maxlen)
236                                 maxlen = j;
237                 }
238         }
239         if (argc <= 0) {
240                 if(!mime)
241                         print ("stdin: ");
242                 filetype(0);
243         }
244         else {
245                 for(i = 0; i < argc; i++)
246                         type(argv[i], maxlen);
247         }
248         exits(0);
249 }
250
251 void
252 type(char *file, int nlen)
253 {
254         Rune r;
255         int i;
256         char *p;
257
258         if(nlen > 0){
259                 slash = 0;
260                 for (i = 0, p = file; *p; i++) {
261                         if (*p == '/')                  /* find rightmost slash */
262                                 slash = p;
263                         p += chartorune(&r, p);         /* count runes */
264                 }
265                 print("%s:%*s",file, nlen-i+1, "");
266         }
267         fname = file;
268         if ((fd = open(file, OREAD)) < 0) {
269                 print("cannot open: %r\n");
270                 return;
271         }
272         filetype(fd);
273         close(fd);
274 }
275
276 void
277 utfconv(void)
278 {
279         Rune r;
280         uchar *rb;
281         char *p, *e;
282         int i;
283
284         if(nbuf < 4)
285                 return;
286
287         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
288                 if(!mime)
289                         print("utf-32be ");
290                 return;
291         } else
292         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
293                 if(!mime)
294                         print("utf-32le ");
295                 return;
296         } else
297         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298                 memmove(buf, buf+3, nbuf-3);
299                 nbuf -= 3;
300                 return;
301         } else
302         if(memcmp(buf, "\xFE\xFF", 2) == 0){
303                 if(!mime)
304                         print("utf-16be ");
305
306                 nbuf -= 2;
307                 rb = malloc(nbuf+1);
308                 memmove(rb, buf+2, nbuf);
309                 p = (char*)buf;
310                 e = p+sizeof(buf)-UTFmax-1;
311                 for(i=0; i<nbuf && p < e; i+=2){
312                         r = rb[i+1] | rb[i]<<8;
313                         p += runetochar(p, &r);
314                 }
315                 *p = 0;
316                 free(rb);
317                 nbuf = p - (char*)buf;
318         } else
319         if(memcmp(buf, "\xFF\xFE", 2) == 0){
320                 if(!mime)
321                         print("utf-16le ");
322
323                 nbuf -= 2;
324                 rb = malloc(nbuf+1);
325                 memmove(rb, buf+2, nbuf);
326                 p = (char*)buf;
327                 e = p+sizeof(buf)-UTFmax-1;
328                 for(i=0; i<nbuf && p < e; i+=2){
329                         r = rb[i] | rb[i+1]<<8;
330                         p += runetochar(p, &r);
331                 }
332                 *p = 0;
333                 free(rb);
334                 nbuf = p - (char*)buf;
335         }
336 }
337
338 void
339 filetype(int fd)
340 {
341         Rune r;
342         int i, f, n;
343         char *p, *eob;
344
345         free(mbuf);
346         mbuf = dirfstat(fd);
347         if(mbuf == nil){
348                 print("cannot stat: %r\n");
349                 return;
350         }
351         if(mbuf->mode & DMDIR) {
352                 print("%s\n", mime ? OCTET : "directory");
353                 return;
354         }
355         if(mbuf->type != 'M' && mbuf->type != '|') {
356                 if(mime)
357                         print("%s\n", OCTET);
358                 else
359                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
360                 return;
361         }
362         /* may be reading a pipe on standard input */
363         nbuf = readn(fd, buf, sizeof(buf)-1);
364         if(nbuf < 0) {
365                 print("cannot read: %r\n");
366                 return;
367         }
368         if(nbuf == 0) {
369                 print("%s\n", mime ? PLAIN : "empty file");
370                 return;
371         }
372         buf[nbuf] = 0;
373
374         utfconv();
375
376         /*
377          * build histogram table
378          */
379         memset(cfreq, 0, sizeof(cfreq));
380         for (i = 0; language[i].name; i++)
381                 language[i].count = 0;
382         eob = (char *)buf+nbuf;
383         for(n = 0, p = (char *)buf; p < eob; n++) {
384                 if (!fullrune(p, eob-p) && eob-p < UTFmax)
385                         break;
386                 p += chartorune(&r, p);
387                 if (r == 0)
388                         f = Cnull;
389                 else if (r <= 0x7f) {
390                         if (!isprint(r) && !isspace(r))
391                                 f = Ceascii;    /* ASCII control char */
392                         else f = r;
393                 } else if (r == 0x80) {
394                         bump_utf_count(r);
395                         f = Cutf;
396                 } else if (r < 0xA0)
397                         f = Cbinary;    /* Invalid Runes */
398                 else if (r <= 0xff)
399                         f = Clatin;     /* Latin 1 */
400                 else {
401                         bump_utf_count(r);
402                         f = Cutf;               /* UTF extension */
403                 }
404                 cfreq[f]++;                     /* ASCII chars peg directly */
405         }
406         /*
407          * gross classify
408          */
409         if (cfreq[Cbinary])
410                 guess = Fbinary;
411         else if (cfreq[Cutf])
412                 guess = Futf;
413         else if (cfreq[Clatin])
414                 guess = Flatin;
415         else if (cfreq[Ceascii])
416                 guess = Feascii;
417         else if (cfreq[Cnull])
418                 guess = Fbinary;
419         else
420                 guess = Fascii;
421         /*
422          * lookup dictionary words
423          */
424         memset(wfreq, 0, sizeof(wfreq));
425         if(guess == Fascii || guess == Flatin || guess == Futf)
426                 wordfreq();
427         /*
428          * call individual classify routines
429          */
430         for(i=0; call[i]; i++)
431                 if((*call[i])())
432                         return;
433
434         /*
435          * if all else fails,
436          * print out gross classification
437          */
438         if (nbuf < 100 && !mime)
439                 print(mime ? PLAIN : "short ");
440         if (guess == Fascii)
441                 print("%s\n", mime ? PLAIN : "Ascii");
442         else if (guess == Feascii)
443                 print("%s\n", mime ? PLAIN : "extended ascii");
444         else if (guess == Flatin)
445                 print("%s\n", mime ? PLAIN : "latin ascii");
446         else if (guess == Futf && utf_count() < 4)
447                 print_utf();
448         else print("%s\n", mime ? OCTET : "binary");
449 }
450
451 void
452 bump_utf_count(Rune r)
453 {
454         int low, high, mid;
455
456         high = sizeof(language)/sizeof(language[0])-1;
457         for (low = 0; low < high;) {
458                 mid = (low+high)/2;
459                 if (r >= language[mid].low) {
460                         if (r <= language[mid].high) {
461                                 language[mid].count++;
462                                 break;
463                         } else low = mid+1;
464                 } else high = mid;
465         }
466 }
467
468 int
469 utf_count(void)
470 {
471         int i, count;
472
473         count = 0;
474         for (i = 0; language[i].name; i++)
475                 if (language[i].count > 0)
476                         switch (language[i].mode) {
477                         case Normal:
478                         case First:
479                                 count++;
480                                 break;
481                         default:
482                                 break;
483                         }
484         return count;
485 }
486
487 int
488 chkascii(void)
489 {
490         int i;
491
492         for (i = 'a'; i < 'z'; i++)
493                 if (cfreq[i])
494                         return 1;
495         for (i = 'A'; i < 'Z'; i++)
496                 if (cfreq[i])
497                         return 1;
498         return 0;
499 }
500
501 int
502 find_first(char *name)
503 {
504         int i;
505
506         for (i = 0; language[i].name != 0; i++)
507                 if (language[i].mode == First
508                         && strcmp(language[i].name, name) == 0)
509                         return i;
510         return -1;
511 }
512
513 void
514 print_utf(void)
515 {
516         int i, printed, j;
517
518         if(mime){
519                 print("%s\n", PLAIN);
520                 return;
521         }
522         if (chkascii()) {
523                 printed = 1;
524                 print("Ascii");
525         } else
526                 printed = 0;
527         for (i = 0; language[i].name; i++)
528                 if (language[i].count) {
529                         switch(language[i].mode) {
530                         case Multi:
531                                 j = find_first(language[i].name);
532                                 if (j < 0)
533                                         break;
534                                 if (language[j].count > 0)
535                                         break;
536                                 /* Fall through */
537                         case Normal:
538                         case First:
539                                 if (printed)
540                                         print(" & ");
541                                 else printed = 1;
542                                 print("%s", language[i].name);
543                                 break;
544                         case Shared:
545                         default:
546                                 break;
547                         }
548                 }
549         if(!printed)
550                 print("UTF");
551         print(" text\n");
552 }
553
554 void
555 wordfreq(void)
556 {
557         int low, high, mid, r;
558         uchar *p, *p2, c;
559
560         p = buf;
561         for(;;) {
562                 while (p < buf+nbuf && !isalpha(*p))
563                         p++;
564                 if (p >= buf+nbuf)
565                         return;
566                 p2 = p;
567                 while(p < buf+nbuf && isalpha(*p))
568                         p++;
569                 c = *p;
570                 *p = 0;
571                 high = sizeof(dict)/sizeof(dict[0]);
572                 for(low = 0;low < high;) {
573                         mid = (low+high)/2;
574                         r = strcmp(dict[mid].word, (char*)p2);
575                         if(r == 0) {
576                                 wfreq[dict[mid].class]++;
577                                 break;
578                         }
579                         if(r < 0)
580                                 low = mid+1;
581                         else
582                                 high = mid;
583                 }
584                 *p++ = c;
585         }
586 }
587
588 typedef struct Filemagic Filemagic;
589 struct Filemagic {
590         ulong x;
591         ulong mask;
592         char *desc;
593         char *mime;
594 };
595
596 /*
597  * integers in this table must be as seen on a little-endian machine
598  * when read from a file.
599  */
600 Filemagic long0tab[] = {
601         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
602         /* "pac1" */
603         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
604         /* "pXc2 */
605         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
606         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
607         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
608         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
609         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
610         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
611         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
612         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
613         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
614         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
615         /* 0xfeedfacf */
616         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
617         /* 0xcefaedfe */
618         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
619         /* 0xcffaedfe */
620         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
621         /* 0xcafebabe */
622         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
623         /*
624          * venti & fossil magic numbers are stored big-endian on disk,
625          * thus the numbers appear reversed in this table.
626          */
627         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
628         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
629 };
630
631 int
632 filemagic(Filemagic *tab, int ntab, ulong x)
633 {
634         int i;
635
636         for(i=0; i<ntab; i++)
637                 if((x&tab[i].mask) == tab[i].x){
638                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
639                         return 1;
640                 }
641         return 0;
642 }
643
644 int
645 long0(void)
646 {
647         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
648 }
649
650 typedef struct Fileoffmag Fileoffmag;
651 struct Fileoffmag {
652         ulong   off;
653         Filemagic;
654 };
655
656 /*
657  * integers in this table must be as seen on a little-endian machine
658  * when read from a file.
659  */
660 Fileoffmag longofftab[] = {
661         /*
662          * venti & fossil magic numbers are stored big-endian on disk,
663          * thus the numbers appear reversed in this table.
664          */
665         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
666         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
667         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
668         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
669 };
670
671 int
672 fileoffmagic(Fileoffmag *tab, int ntab)
673 {
674         int i;
675         ulong x;
676         Fileoffmag *tp;
677         uchar buf[sizeof(long)];
678
679         for(i=0; i<ntab; i++) {
680                 tp = tab + i;
681                 seek(fd, tp->off, 0);
682                 if (readn(fd, buf, sizeof buf) != sizeof buf)
683                         continue;
684                 x = LENDIAN(buf);
685                 if((x&tp->mask) == tp->x){
686                         print("%s\n", mime ? tp->mime : tp->desc);
687                         return 1;
688                 }
689         }
690         return 0;
691 }
692
693 int
694 longoff(void)
695 {
696         return fileoffmagic(longofftab, nelem(longofftab));
697 }
698
699 int
700 isexec(void)
701 {
702         Fhdr f;
703
704         seek(fd, 0, 0);         /* reposition to start of file */
705         if(crackhdr(fd, &f)) {
706                 print("%s\n", mime ? OCTET : f.name);
707                 return 1;
708         }
709         return 0;
710 }
711
712
713 /* from tar.c */
714 enum { NAMSIZ = 100, TBLOCK = 512 };
715
716 union   hblock
717 {
718         char    dummy[TBLOCK];
719         struct  header
720         {
721                 char    name[NAMSIZ];
722                 char    mode[8];
723                 char    uid[8];
724                 char    gid[8];
725                 char    size[12];
726                 char    mtime[12];
727                 char    chksum[8];
728                 char    linkflag;
729                 char    linkname[NAMSIZ];
730                 /* rest are defined by POSIX's ustar format; see p1003.2b */
731                 char    magic[6];       /* "ustar" */
732                 char    version[2];
733                 char    uname[32];
734                 char    gname[32];
735                 char    devmajor[8];
736                 char    devminor[8];
737                 char    prefix[155];  /* if non-null, path = prefix "/" name */
738         } dbuf;
739 };
740
741 int
742 checksum(union hblock *hp)
743 {
744         int i;
745         char *cp;
746         struct header *hdr = &hp->dbuf;
747
748         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
749                 *cp = ' ';
750         i = 0;
751         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
752                 i += *cp & 0xff;
753         return i;
754 }
755
756 int
757 istar(void)
758 {
759         int chksum;
760         char tblock[TBLOCK];
761         union hblock *hp = (union hblock *)tblock;
762         struct header *hdr = &hp->dbuf;
763
764         seek(fd, 0, 0);         /* reposition to start of file */
765         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
766                 return 0;
767         chksum = strtol(hdr->chksum, 0, 8);
768         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
769                 if (strcmp(hdr->magic, "ustar") == 0)
770                         print(mime? "application/x-ustar\n": "posix tar archive\n");
771                 else
772                         print(mime? "application/x-tar\n": "tar archive\n");
773                 return 1;
774         }
775         return 0;
776 }
777
778 /*
779  * initial words to classify file
780  */
781 struct  FILE_STRING
782 {
783         char    *key;
784         char    *filetype;
785         int     length;
786         char    *mime;
787 } file_string[] =
788 {
789         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
790         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
791         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
792         "!<arch>\n__.SYMDEF",   "archive random library",       16,     "application/octet-stream",
793         "!<arch>\n",            "archive",                      8,      "application/octet-stream",
794         "070707",               "cpio archive - ascii header",  6,      "application/octet-stream",
795         "#!/bin/rc",            "rc executable file",           9,      "text/plain",
796         "#!/bin/sh",            "sh executable file",           9,      "text/plain",
797         "%!",                   "postscript",                   2,      "application/postscript",
798         "\004%!",               "postscript",                   3,      "application/postscript",
799         "x T post",             "troff output for post",        8,      "application/troff",
800         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
801         "x T utf",              "troff output for UTF",         7,      "application/troff",
802         "x T 202",              "troff output for 202",         7,      "application/troff",
803         "x T aps",              "troff output for aps",         7,      "application/troff",
804         "x T ",                 "troff output",                 4,      "application/troff",
805         "GIF",                  "GIF image",                    3,      "image/gif",
806         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
807         "%PDF",                 "PDF",                          4,      "application/pdf",
808         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
809         "<!doctype",            "HTML file",                    9,      "text/html",
810         "<!--",                 "HTML file",                    4,      "text/html",
811         "<html>",               "HTML file",                    6,      "text/html",
812         "<HTML>",               "HTML file",                    6,      "text/html",
813         "<?xml",                "HTML file",                    5,      "text/html",
814         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
815         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
816         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
817         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
818         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
819         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
820         "BM",                   "bmp",                          2,      "image/bmp", 
821         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
822         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
823         "\033E\033",    "HP PCL printer data",          3,      OCTET,
824         "\033&",        "HP PCL printer data",          2,      OCTET,
825         "\033%-12345X", "HPJCL file",           9,      "application/hpjcl",
826         "\033Lua",              "Lua bytecode",         4,      OCTET,
827         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
828         "OggS",                 "ogg audio",            4,      "audio/ogg",
829         ".snd",                 "sun audio",            4,      "audio/basic",
830         "\211PNG",              "PNG image",            4,      "image/png",
831         "P1\n",                 "ppm",                          3,      "image/ppm",
832         "P2\n",                 "ppm",                          3,      "image/ppm",
833         "P3\n",                 "ppm",                          3,      "image/ppm",
834         "P4\n",                 "ppm",                          3,      "image/ppm",
835         "P5\n",                 "ppm",                          3,      "image/ppm",
836         "P6\n",                 "ppm",                          3,      "image/ppm",
837         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
838         ".HTML ",               "troff -ms input",      6,      "text/troff",
839         ".LP",                  "troff -ms input",      3,      "text/troff",
840         ".ND",                  "troff -ms input",      3,      "text/troff",
841         ".PP",                  "troff -ms input",      3,      "text/troff",
842         ".TL",                  "troff -ms input",      3,      "text/troff",
843         ".TR",                  "troff -ms input",      3,      "text/troff",
844         ".TH",                  "manual page",          3,      "text/troff",
845         ".\\\"",                "troff input",          3,      "text/troff",
846         ".de",                  "troff input",          3,      "text/troff",
847         ".if",                  "troff input",          3,      "text/troff",
848         ".nr",                  "troff input",          3,      "text/troff",
849         ".tr",                  "troff input",          3,      "text/troff",
850         "vac:",                 "venti score",          4,      "text/plain",
851         "-----BEGIN CERTIFICATE-----\n",
852                                 "pem certificate",      -1,     "text/plain",
853         "-----BEGIN TRUSTED CERTIFICATE-----\n",
854                                 "pem trusted certificate", -1,  "text/plain",
855         "-----BEGIN X509 CERTIFICATE-----\n",
856                                 "pem x.509 certificate", -1,    "text/plain",
857         "subject=/C=",          "pem certificate with header", -1, "text/plain",
858         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
859         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
860         "[playlist]",           "playlist",             10,     "application/x-scpls",
861         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
862         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
863         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
864         0,0,0,0
865 };
866
867 int
868 istring(void)
869 {
870         int i, l;
871         struct FILE_STRING *p;
872
873         for(p = file_string; p->key; p++) {
874                 l = p->length;
875                 if(l == -1)
876                         l = strlen(p->key);
877                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
878                         print("%s\n", mime ? p->mime : p->filetype);
879                         return 1;
880                 }
881         }
882         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
883                 for(i = 5; i < nbuf; i++)
884                         if(buf[i] == '\n')
885                                 break;
886                 if(mime)
887                         print("%s\n", OCTET);
888                 else
889                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
890                 return 1;
891         }
892         return 0;
893 }
894
895 struct offstr
896 {
897         ulong   off;
898         struct FILE_STRING;
899 } offstrs[] = {
900         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
901         0, 0, 0, 0, 0
902 };
903
904 int
905 isoffstr(void)
906 {
907         int n;
908         char buf[256];
909         struct offstr *p;
910
911         for(p = offstrs; p->key; p++) {
912                 seek(fd, p->off, 0);
913                 n = p->length;
914                 if (n > sizeof buf)
915                         n = sizeof buf;
916                 if (readn(fd, buf, n) != n)
917                         continue;
918                 if(memcmp(buf, p->key, n) == 0) {
919                         print("%s\n", mime ? p->mime : p->filetype);
920                         return 1;
921                 }
922         }
923         return 0;
924 }
925
926 int
927 iff(void)
928 {
929         if (strncmp((char*)buf, "FORM", 4) == 0 &&
930             strncmp((char*)buf+8, "AIFF", 4) == 0) {
931                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
932                 return 1;
933         }
934         if (strncmp((char*)buf, "RIFF", 4) == 0) {
935                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
936                         print("%s\n", mime? "audio/wave": "wave audio");
937                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
938                         print("%s\n", mime? "video/avi": "avi video");
939                 else
940                         print("%s\n", mime? "application/octet-stream": "riff file");
941                 return 1;
942         }
943         return 0;
944 }
945
946 char*   html_string[] = {
947         "blockquote",
948         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
949         "caption",
950         "button", "center", "iframe", "object", "option", "script",
951         "select", "strong",
952         "blink", "embed", "frame", "input", "label", "param", "small",
953         "style", "table", "tbody", "tfoot", "thead", "title",
954         "?xml", "body", "code", "font", "form", "head", "html",
955         "link", "menu", "meta", "span",
956         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
957         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
958         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
959         "a", "b", "i", "p", "q", "u",
960         0,
961 };
962
963 int
964 ishtml(void)
965 {
966         int i, n, count;
967         uchar *p;
968
969         count = 0;
970         p = buf;
971         for(;;) {
972                 while(p < buf+nbuf && *p != '<')
973                         p++;
974                 p++;
975                 if (p >= buf+nbuf)
976                         break;
977                 if(*p == '/')
978                         p++;
979                 if(p >= buf+nbuf)
980                         break;
981                 for(i = 0; html_string[i]; i++){
982                         n = strlen(html_string[i]);
983                         if(p + n > buf+nbuf)
984                                 continue;
985                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
986                                 p += n;
987                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
988                                         if(++count > 2) {
989                                                 print("%s\n", mime ? "text/html" : "HTML file");
990                                                 return 1;
991                                         }
992                                 }
993                                 break;
994                         }
995                 }
996         }
997         return 0;
998 }
999
1000 char*   rfc822_string[] =
1001 {
1002         "from:",
1003         "date:",
1004         "to:",
1005         "subject:",
1006         "received:",
1007         "reply to:",
1008         "sender:",
1009         0,
1010 };
1011
1012 int
1013 isrfc822(void)
1014 {
1015
1016         char *p, *q, *r;
1017         int i, count;
1018
1019         count = 0;
1020         p = (char*)buf;
1021         for(;;) {
1022                 q = strchr(p, '\n');
1023                 if(q == nil)
1024                         break;
1025                 *q = 0;
1026                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1027                         count++;
1028                         *q = '\n';
1029                         p = q+1;
1030                         continue;
1031                 }
1032                 *q = '\n';
1033                 if(*p != '\t' && *p != ' '){
1034                         r = strchr(p, ':');
1035                         if(r == 0 || r > q)
1036                                 break;
1037                         for(i = 0; rfc822_string[i]; i++) {
1038                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1039                                         count++;
1040                                         break;
1041                                 }
1042                         }
1043                 }
1044                 p = q+1;
1045         }
1046         if(count >= 3){
1047                 print("%s\n", mime ? "message/rfc822" : "email file");
1048                 return 1;
1049         }
1050         return 0;
1051 }
1052
1053 int
1054 ismbox(void)
1055 {
1056         char *p, *q;
1057
1058         p = (char*)buf;
1059         q = strchr(p, '\n');
1060         if(q == nil)
1061                 return 0;
1062         *q = 0;
1063         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1064                 print("%s\n", mime ? "text/plain" : "mail box");
1065                 return 1;
1066         }
1067         *q = '\n';
1068         return 0;
1069 }
1070
1071 int
1072 iscint(void)
1073 {
1074         int type;
1075         char *name;
1076         Biobuf b;
1077
1078         if(Binit(&b, fd, OREAD) == Beof)
1079                 return 0;
1080         seek(fd, 0, 0);
1081         type = objtype(&b, &name);
1082         if(type < 0)
1083                 return 0;
1084         if(mime)
1085                 print("%s\n", OCTET);
1086         else
1087                 print("%s intermediate\n", name);
1088         return 1;
1089 }
1090
1091 int
1092 isc(void)
1093 {
1094         int n;
1095
1096         n = wfreq[I1];
1097         /*
1098          * includes
1099          */
1100         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1101                 goto yes;
1102         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1103                 goto yes;
1104         /*
1105          * declarations
1106          */
1107         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1108                 goto yes;
1109         /*
1110          * assignments
1111          */
1112         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1113                 goto yes;
1114         return 0;
1115
1116 yes:
1117         if(mime){
1118                 print("%s\n", PLAIN);
1119                 return 1;
1120         }
1121         if(wfreq[Alword] > 0)
1122                 print("alef program\n");
1123         else
1124                 print("c program\n");
1125         return 1;
1126 }
1127
1128 int
1129 islimbo(void)
1130 {
1131         /*
1132          * includes
1133          */
1134         if(wfreq[Lword] < 4)
1135                 return 0;
1136         print("%s\n", mime ? PLAIN : "limbo program");
1137         return 1;
1138 }
1139
1140 int
1141 isas(void)
1142 {
1143         /*
1144          * includes
1145          */
1146         if(wfreq[Aword] < 2)
1147                 return 0;
1148         print("%s\n", mime ? PLAIN : "as program");
1149         return 1;
1150 }
1151
1152 int
1153 istga(void)
1154 {
1155         uchar *p;
1156
1157         p = buf;
1158         if(nbuf < 18)
1159                 return 0;
1160         if((p[12] | p[13]<<8) == 0)     /* width */
1161                 return 0;
1162         if((p[14] | p[15]<<8) == 0)     /* height */
1163                 return 0;
1164         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1165                 return 0;
1166         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1167                 return 0;
1168         if(p[1] == 0){  /* non color-mapped */
1169                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1170                         return 0;
1171                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1172                         return 0;
1173         } else
1174         if(p[1] == 1){  /* color-mapped */
1175                 if((p[2]&3) != 1 || p[7] == 0)  
1176                         return 0;
1177                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1178                         return 0;
1179         } else
1180                 return 0;
1181         print("%s\n", mime ? "image/tga" : "targa image");
1182         return 1;
1183 }
1184
1185 int
1186 ismp3(void)
1187 {
1188         uchar *p, *e;
1189
1190         p = buf;
1191         e = p + nbuf-1;
1192         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1193                 if((p[1] & 0xFE) == 0xFA){
1194                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1195                         return 1;
1196                 }
1197                 p++;
1198         }
1199         return 0;
1200 }
1201
1202 /*
1203  * low entropy means encrypted
1204  */
1205 int
1206 ismung(void)
1207 {
1208         int i, bucket[8];
1209         float cs;
1210
1211         if(nbuf < 64)
1212                 return 0;
1213         memset(bucket, 0, sizeof(bucket));
1214         for(i=nbuf-64; i<nbuf; i++)
1215                 bucket[(buf[i]>>5)&07] += 1;
1216
1217         cs = 0.;
1218         for(i=0; i<8; i++)
1219                 cs += (bucket[i]-8)*(bucket[i]-8);
1220         cs /= 8.;
1221         if(cs <= 24.322) {
1222                 if(buf[0]==0x1f && buf[1]==0x9d)
1223                         print("%s\n", mime ? "application/x-compress" : "compressed");
1224                 else
1225                 if(buf[0]==0x1f && buf[1]==0x8b)
1226                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1227                 else
1228                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1229                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1230                 else
1231                 if(buf[0]==0x78 && buf[1]==0x9c)
1232                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1233                 else
1234                         print("%s\n", mime ? OCTET : "encrypted");
1235                 return 1;
1236         }
1237         return 0;
1238 }
1239
1240 /*
1241  * english by punctuation and frequencies
1242  */
1243 int
1244 isenglish(void)
1245 {
1246         int vow, comm, rare, badpun, punct;
1247         char *p;
1248
1249         if(guess != Fascii && guess != Feascii)
1250                 return 0;
1251         badpun = 0;
1252         punct = 0;
1253         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1254                 switch(*p) {
1255                 case '.':
1256                 case ',':
1257                 case ')':
1258                 case '%':
1259                 case ';':
1260                 case ':':
1261                 case '?':
1262                         punct++;
1263                         if(p[1] != ' ' && p[1] != '\n')
1264                                 badpun++;
1265                 }
1266         if(badpun*5 > punct)
1267                 return 0;
1268         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1269                 return 0;
1270         if(2*cfreq[';'] > cfreq['e'])
1271                 return 0;
1272
1273         vow = 0;
1274         for(p="AEIOU"; *p; p++) {
1275                 vow += cfreq[*p];
1276                 vow += cfreq[tolower(*p)];
1277         }
1278         comm = 0;
1279         for(p="ETAION"; *p; p++) {
1280                 comm += cfreq[*p];
1281                 comm += cfreq[tolower(*p)];
1282         }
1283         rare = 0;
1284         for(p="VJKQXZ"; *p; p++) {
1285                 rare += cfreq[*p];
1286                 rare += cfreq[tolower(*p)];
1287         }
1288         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1289                 print("%s\n", mime ? PLAIN : "English text");
1290                 return 1;
1291         }
1292         return 0;
1293 }
1294
1295 /*
1296  * pick up a number with
1297  * syntax _*[0-9]+_
1298  */
1299 #define P9BITLEN        12
1300 int
1301 p9bitnum(uchar *bp)
1302 {
1303         int n, c, len;
1304
1305         len = P9BITLEN;
1306         while(*bp == ' ') {
1307                 bp++;
1308                 len--;
1309                 if(len <= 0)
1310                         return -1;
1311         }
1312         n = 0;
1313         while(len > 1) {
1314                 c = *bp++;
1315                 if(!isdigit(c))
1316                         return -1;
1317                 n = n*10 + c-'0';
1318                 len--;
1319         }
1320         if(*bp != ' ')
1321                 return -1;
1322         return n;
1323 }
1324
1325 int
1326 depthof(char *s, int *newp)
1327 {
1328         char *es;
1329         int d;
1330
1331         *newp = 0;
1332         es = s+12;
1333         while(s<es && *s==' ')
1334                 s++;
1335         if(s == es)
1336                 return -1;
1337         if('0'<=*s && *s<='9')
1338                 return 1<<strtol(s, 0, 0);
1339
1340         *newp = 1;
1341         d = 0;
1342         while(s<es && *s!=' '){
1343                 s++;                    /* skip letter */
1344                 d += strtoul(s, &s, 10);
1345         }
1346
1347         if(d % 8 == 0 || 8 % d == 0)
1348                 return d;
1349         else
1350                 return -1;
1351 }
1352
1353 int
1354 isp9bit(void)
1355 {
1356         int dep, lox, loy, hix, hiy, px, new, cmpr;
1357         ulong t;
1358         long len;
1359         char *newlabel;
1360         uchar *cp;
1361
1362         cp = buf;
1363         cmpr = 0;
1364         newlabel = "old ";
1365
1366         if(memcmp(cp, "compressed\n", 11) == 0) {
1367                 cmpr = 1;
1368                 cp = buf + 11;
1369         }
1370
1371         dep = depthof((char*)cp + 0*P9BITLEN, &new);
1372         if(new)
1373                 newlabel = "";
1374         lox = p9bitnum(cp + 1*P9BITLEN);
1375         loy = p9bitnum(cp + 2*P9BITLEN);
1376         hix = p9bitnum(cp + 3*P9BITLEN);
1377         hiy = p9bitnum(cp + 4*P9BITLEN);
1378         if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1379                 return 0;
1380
1381         if(dep < 8){
1382                 px = 8/dep;             /* pixels per byte */
1383                 /* set l to number of bytes of data per scan line */
1384                 if(lox >= 0)
1385                         len = (hix+px-1)/px - lox/px;
1386                 else{                   /* make positive before divide */
1387                         t = (-lox)+px-1;
1388                         t = (t/px)*px;
1389                         len = (t+hix+px-1)/px;
1390                 }
1391         }else
1392                 len = (hix-lox)*dep/8;
1393         len *= hiy - loy;               /* col length */
1394         len += 5 * P9BITLEN;            /* size of initial ascii */
1395
1396         /*
1397          * for compressed images, don't look any further. otherwise:
1398          * for image file, length is non-zero and must match calculation above.
1399          * for /dev/window and /dev/screen the length is always zero.
1400          * for subfont, the subfont header should follow immediately.
1401          */
1402         if (cmpr) {
1403                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1404                         newlabel, dep);
1405                 return 1;
1406         }
1407         /*
1408          * mbuf->length == 0 probably indicates reading a pipe.
1409          * Ghostscript sometimes produces a little extra on the end.
1410          */
1411         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1412             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1413                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1414                 return 1;
1415         }
1416         if (p9subfont(buf+len)) {
1417                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1418                 return 1;
1419         }
1420         return 0;
1421 }
1422
1423 int
1424 p9subfont(uchar *p)
1425 {
1426         int n, h, a;
1427
1428         /* if image too big, assume it's a subfont */
1429         if (p+3*P9BITLEN > buf+sizeof(buf))
1430                 return 1;
1431
1432         n = p9bitnum(p + 0*P9BITLEN);   /* char count */
1433         if (n < 0)
1434                 return 0;
1435         h = p9bitnum(p + 1*P9BITLEN);   /* height */
1436         if (h < 0)
1437                 return 0;
1438         a = p9bitnum(p + 2*P9BITLEN);   /* ascent */
1439         if (a < 0)
1440                 return 0;
1441         return 1;
1442 }
1443
1444 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1445
1446 int
1447 isp9font(void)
1448 {
1449         uchar *cp, *p;
1450         int i, n;
1451         char pathname[1024];
1452
1453         cp = buf;
1454         if (!getfontnum(cp, &cp))       /* height */
1455                 return 0;
1456         if (!getfontnum(cp, &cp))       /* ascent */
1457                 return 0;
1458         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1459                 if (!getfontnum(cp, &cp))       /* min */
1460                         break;
1461                 if (!getfontnum(cp, &cp))       /* max */
1462                         return 0;
1463                 getfontnum(cp, &cp);    /* optional offset */
1464                 while (WHITESPACE(*cp))
1465                         cp++;
1466                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1467                                 ;
1468                         /* construct a path name, if needed */
1469                 n = 0;
1470                 if (*p != '/' && slash) {
1471                         n = slash-fname+1;
1472                         if (n < sizeof(pathname))
1473                                 memcpy(pathname, fname, n);
1474                         else n = 0;
1475                 }
1476                 if (n+cp-p+4 < sizeof(pathname)) {
1477                         memcpy(pathname+n, p, cp-p);
1478                         n += cp-p;
1479                         pathname[n] = 0;
1480                         if (access(pathname, AEXIST) < 0) {
1481                                 strcpy(pathname+n, ".0");
1482                                 if (access(pathname, AEXIST) < 0)
1483                                         return 0;
1484                         }
1485                 }
1486         }
1487         if (i) {
1488                 print(mime ? "text/plain\n" : "font file\n");
1489                 return 1;
1490         }
1491         return 0;
1492 }
1493
1494 int
1495 getfontnum(uchar *cp, uchar **rp)
1496 {
1497         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1498                 cp++;
1499         if (*cp < '0' || *cp > '9')
1500                 return 0;
1501         strtoul((char *)cp, (char **)rp, 0);
1502         if (!WHITESPACE(**rp)) {
1503                 *rp = cp;
1504                 return 0;
1505         }
1506         return 1;
1507 }
1508
1509 int
1510 isrtf(void)
1511 {
1512         if(strstr((char *)buf, "\\rtf1")){
1513                 print(mime ? "application/rtf\n" : "rich text format\n");
1514                 return 1;
1515         }
1516         return 0;
1517 }
1518
1519 int
1520 ismsdos(void)
1521 {
1522         if (buf[0] == 0x4d && buf[1] == 0x5a){
1523                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1524                 return 1;
1525         }
1526         return 0;
1527 }
1528
1529 int
1530 isicocur(void)
1531 {
1532         if(buf[0] || buf[1] || buf[3] || buf[9])
1533                 return 0;
1534         if(buf[4] == 0x00 && buf[5] == 0x00)
1535                 return 0;
1536         switch(buf[2]){
1537         case 1:
1538                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1539                 return 1;
1540         case 2:
1541                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1542                 return 1;
1543         }
1544         return 0;
1545 }
1546
1547 int
1548 iself(void)
1549 {
1550         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1551         [1]     "WE32100",
1552         [2]     "SPARC",
1553         [3]     "i386",
1554         [4]     "M68000",
1555         [5]     "M88000",
1556         [6]     "i486",
1557         [7]     "i860",
1558         [8]     "R3000",
1559         [9]     "S370",
1560         [10]    "R4000",
1561         [15]    "HP-PA",
1562         [18]    "sparc v8+",
1563         [19]    "i960",
1564         [20]    "PPC-32",
1565         [21]    "PPC-64",
1566         [40]    "ARM",
1567         [41]    "Alpha",
1568         [43]    "sparc v9",
1569         [50]    "IA-64",
1570         [62]    "AMD64",
1571         [75]    "VAX",
1572         };
1573         static char *type[] = {
1574         [1]     "relocatable object",
1575         [2]     "executable",
1576         [3]     "shared library",
1577         [4]     "core dump",
1578         };
1579
1580         if (memcmp(buf, "\x7fELF", 4) == 0){
1581                 if (!mime){
1582                         int isdifend = 0;
1583                         int n = (buf[19] << 8) | buf[18];
1584                         char *p = "unknown";
1585                         char *t = "unknown";
1586
1587                         if (n > 0 && n < nelem(cpu) && cpu[n])
1588                                 p = cpu[n];
1589                         else {
1590                                 /* try the other byte order */
1591                                 isdifend = 1;
1592                                 n = (buf[18] << 8) | buf[19];
1593                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1594                                         p = cpu[n];
1595                         }
1596                         if(isdifend)
1597                                 n = (buf[16]<< 8) | buf[17];
1598                         else
1599                                 n = (buf[17]<< 8) | buf[16];
1600
1601                         if(n>0 && n < nelem(type) && type[n])
1602                                 t = type[n];
1603                         print("%s ELF %s\n", p, t);
1604                 }
1605                 else
1606                         print("application/x-elf-executable\n");
1607                 return 1;
1608         }
1609
1610         return 0;
1611 }
1612
1613 int
1614 isface(void)
1615 {
1616         int i, j, ldepth, l;
1617         char *p;
1618
1619         ldepth = -1;
1620         for(j = 0; j < 3; j++){
1621                 for(p = (char*)buf, i=0; i<3; i++){
1622                         if(p[0] != '0' || p[1] != 'x')
1623                                 return 0;
1624                         if(buf[2+8] == ',')
1625                                 l = 2;
1626                         else if(buf[2+4] == ',')
1627                                 l = 1;
1628                         else
1629                                 return 0;
1630                         if(ldepth == -1)
1631                                 ldepth = l;
1632                         if(l != ldepth)
1633                                 return 0;
1634                         strtoul(p, &p, 16);
1635                         if(*p++ != ',')
1636                                 return 0;
1637                         while(*p == ' ' || *p == '\t')
1638                                 p++;
1639                 }
1640                 if (*p++ != '\n')
1641                         return 0;
1642         }
1643
1644         if(mime)
1645                 print("application/x-face\n");
1646         else
1647                 print("face image depth %d\n", ldepth);
1648         return 1;
1649 }
1650