]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
merge
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismung(void);
156 int     isp9bit(void);
157 int     isp9font(void);
158 int     isrtf(void);
159 int     ismsdos(void);
160 int     isicocur(void);
161 int     iself(void);
162 int     istring(void);
163 int     isoffstr(void);
164 int     iff(void);
165 int     long0(void);
166 int     longoff(void);
167 int     istar(void);
168 int     isface(void);
169 int     isexec(void);
170 int     p9bitnum(char*, int*);
171 int     p9subfont(uchar*);
172 void    print_utf(void);
173 void    type(char*, int);
174 int     utf_count(void);
175 void    wordfreq(void);
176
177 int     (*call[])(void) =
178 {
179         long0,          /* recognizable by first 4 bytes */
180         istring,        /* recognizable by first string */
181         iself,          /* ELF (foreign) executable */
182         isexec,         /* native executables */
183         iff,            /* interchange file format (strings) */
184         longoff,        /* recognizable by 4 bytes at some offset */
185         isoffstr,       /* recognizable by string at some offset */
186         isrfc822,       /* email file */
187         ismbox,         /* mail box */
188         istar,          /* recognizable by tar checksum */
189         iscint,         /* compiler/assembler intermediate */
190         ishtml,         /* html keywords */
191         islimbo,        /* limbo source */
192         isc,            /* c & alef compiler key words */
193         isas,           /* assembler key words */
194         isp9font,       /* plan 9 font */
195         isp9bit,        /* plan 9 image (as from /dev/window) */
196         isrtf,          /* rich text format */
197         ismsdos,        /* msdos exe (virus file attachement) */
198         isicocur,               /* windows icon or cursor file */
199         isface,         /* ascii face file */
200         istga,
201         ismp3,
202
203         /* last resorts */
204         ismung,         /* entropy compressed/encrypted */
205         isenglish,      /* char frequency English */
206         0
207 };
208
209 int mime;
210
211 char OCTET[] =  "application/octet-stream";
212 char PLAIN[] =  "text/plain";
213
214 void
215 main(int argc, char *argv[])
216 {
217         int i, j, maxlen;
218         char *cp;
219         Rune r;
220
221         ARGBEGIN{
222         case 'm':
223                 mime = 1;
224                 break;
225         default:
226                 fprint(2, "usage: file [-m] [file...]\n");
227                 exits("usage");
228         }ARGEND;
229
230         maxlen = 0;
231         if(mime == 0 || argc > 1){
232                 for(i = 0; i < argc; i++) {
233                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
234                                         ;
235                         if(j > maxlen)
236                                 maxlen = j;
237                 }
238         }
239         if (argc <= 0) {
240                 if(!mime)
241                         print ("stdin: ");
242                 filetype(0);
243         }
244         else {
245                 for(i = 0; i < argc; i++)
246                         type(argv[i], maxlen);
247         }
248         exits(0);
249 }
250
251 void
252 type(char *file, int nlen)
253 {
254         Rune r;
255         int i;
256         char *p;
257
258         if(nlen > 0){
259                 slash = 0;
260                 for (i = 0, p = file; *p; i++) {
261                         if (*p == '/')                  /* find rightmost slash */
262                                 slash = p;
263                         p += chartorune(&r, p);         /* count runes */
264                 }
265                 print("%s:%*s",file, nlen-i+1, "");
266         }
267         fname = file;
268         if ((fd = open(file, OREAD)) < 0) {
269                 fprint(2, "cannot open: %r\n");
270                 return;
271         }
272         filetype(fd);
273         close(fd);
274 }
275
276 void
277 utfconv(void)
278 {
279         Rune r;
280         uchar *rb;
281         char *p, *e;
282         int i;
283
284         if(nbuf < 4)
285                 return;
286
287         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
288                 if(!mime)
289                         print("utf-32be ");
290                 return;
291         } else
292         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
293                 if(!mime)
294                         print("utf-32le ");
295                 return;
296         } else
297         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298                 memmove(buf, buf+3, nbuf-3);
299                 nbuf -= 3;
300                 return;
301         } else
302         if(memcmp(buf, "\xFE\xFF", 2) == 0){
303                 if(!mime)
304                         print("utf-16be ");
305
306                 nbuf -= 2;
307                 rb = malloc(nbuf+1);
308                 memmove(rb, buf+2, nbuf);
309                 p = (char*)buf;
310                 e = p+sizeof(buf)-UTFmax-1;
311                 for(i=0; i<nbuf && p < e; i+=2){
312                         r = rb[i+1] | rb[i]<<8;
313                         p += runetochar(p, &r);
314                 }
315                 *p = 0;
316                 free(rb);
317                 nbuf = p - (char*)buf;
318         } else
319         if(memcmp(buf, "\xFF\xFE", 2) == 0){
320                 if(!mime)
321                         print("utf-16le ");
322
323                 nbuf -= 2;
324                 rb = malloc(nbuf+1);
325                 memmove(rb, buf+2, nbuf);
326                 p = (char*)buf;
327                 e = p+sizeof(buf)-UTFmax-1;
328                 for(i=0; i<nbuf && p < e; i+=2){
329                         r = rb[i] | rb[i+1]<<8;
330                         p += runetochar(p, &r);
331                 }
332                 *p = 0;
333                 free(rb);
334                 nbuf = p - (char*)buf;
335         }
336 }
337
338 void
339 filetype(int fd)
340 {
341         Rune r;
342         int i, f, n;
343         char *p, *eob;
344
345         free(mbuf);
346         mbuf = dirfstat(fd);
347         if(mbuf == nil){
348                 fprint(2, "cannot stat: %r\n");
349                 return;
350         }
351         if(mbuf->mode & DMDIR) {
352                 print("%s\n", mime ? OCTET : "directory");
353                 return;
354         }
355         if(mbuf->type != 'M' && mbuf->type != '|') {
356                 if(mime)
357                         print("%s\n", OCTET);
358                 else
359                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
360                 return;
361         }
362         /* may be reading a pipe on standard input */
363         nbuf = readn(fd, buf, sizeof(buf)-1);
364         if(nbuf < 0) {
365                 fprint(2, "cannot read: %r\n");
366                 return;
367         }
368         if(nbuf == 0) {
369                 print("%s\n", mime ? PLAIN : "empty file");
370                 return;
371         }
372         buf[nbuf] = 0;
373
374         utfconv();
375
376         /*
377          * build histogram table
378          */
379         memset(cfreq, 0, sizeof(cfreq));
380         for (i = 0; language[i].name; i++)
381                 language[i].count = 0;
382         eob = (char *)buf+nbuf;
383         for(n = 0, p = (char *)buf; p < eob; n++) {
384                 if (!fullrune(p, eob-p) && eob-p < UTFmax)
385                         break;
386                 p += chartorune(&r, p);
387                 if (r == 0)
388                         f = Cnull;
389                 else if (r <= 0x7f) {
390                         if (!isprint(r) && !isspace(r))
391                                 f = Ceascii;    /* ASCII control char */
392                         else f = r;
393                 } else if (r == 0x80) {
394                         bump_utf_count(r);
395                         f = Cutf;
396                 } else if (r < 0xA0)
397                         f = Cbinary;    /* Invalid Runes */
398                 else if (r <= 0xff)
399                         f = Clatin;     /* Latin 1 */
400                 else {
401                         bump_utf_count(r);
402                         f = Cutf;               /* UTF extension */
403                 }
404                 cfreq[f]++;                     /* ASCII chars peg directly */
405         }
406         /*
407          * gross classify
408          */
409         if (cfreq[Cbinary])
410                 guess = Fbinary;
411         else if (cfreq[Cutf])
412                 guess = Futf;
413         else if (cfreq[Clatin])
414                 guess = Flatin;
415         else if (cfreq[Ceascii])
416                 guess = Feascii;
417         else if (cfreq[Cnull])
418                 guess = Fbinary;
419         else
420                 guess = Fascii;
421         /*
422          * lookup dictionary words
423          */
424         memset(wfreq, 0, sizeof(wfreq));
425         if(guess == Fascii || guess == Flatin || guess == Futf)
426                 wordfreq();
427         /*
428          * call individual classify routines
429          */
430         for(i=0; call[i]; i++)
431                 if((*call[i])())
432                         return;
433
434         /*
435          * if all else fails,
436          * print out gross classification
437          */
438         if (nbuf < 100 && !mime)
439                 print(mime ? PLAIN : "short ");
440         if (guess == Fascii)
441                 print("%s\n", mime ? PLAIN : "Ascii");
442         else if (guess == Feascii)
443                 print("%s\n", mime ? PLAIN : "extended ascii");
444         else if (guess == Flatin)
445                 print("%s\n", mime ? PLAIN : "latin ascii");
446         else if (guess == Futf && utf_count() < 4)
447                 print_utf();
448         else print("%s\n", mime ? OCTET : "binary");
449 }
450
451 void
452 bump_utf_count(Rune r)
453 {
454         int low, high, mid;
455
456         high = sizeof(language)/sizeof(language[0])-1;
457         for (low = 0; low < high;) {
458                 mid = (low+high)/2;
459                 if (r >= language[mid].low) {
460                         if (r <= language[mid].high) {
461                                 language[mid].count++;
462                                 break;
463                         } else low = mid+1;
464                 } else high = mid;
465         }
466 }
467
468 int
469 utf_count(void)
470 {
471         int i, count;
472
473         count = 0;
474         for (i = 0; language[i].name; i++)
475                 if (language[i].count > 0)
476                         switch (language[i].mode) {
477                         case Normal:
478                         case First:
479                                 count++;
480                                 break;
481                         default:
482                                 break;
483                         }
484         return count;
485 }
486
487 int
488 chkascii(void)
489 {
490         int i;
491
492         for (i = 'a'; i < 'z'; i++)
493                 if (cfreq[i])
494                         return 1;
495         for (i = 'A'; i < 'Z'; i++)
496                 if (cfreq[i])
497                         return 1;
498         return 0;
499 }
500
501 int
502 find_first(char *name)
503 {
504         int i;
505
506         for (i = 0; language[i].name != 0; i++)
507                 if (language[i].mode == First
508                         && strcmp(language[i].name, name) == 0)
509                         return i;
510         return -1;
511 }
512
513 void
514 print_utf(void)
515 {
516         int i, printed, j;
517
518         if(mime){
519                 print("%s\n", PLAIN);
520                 return;
521         }
522         if (chkascii()) {
523                 printed = 1;
524                 print("Ascii");
525         } else
526                 printed = 0;
527         for (i = 0; language[i].name; i++)
528                 if (language[i].count) {
529                         switch(language[i].mode) {
530                         case Multi:
531                                 j = find_first(language[i].name);
532                                 if (j < 0)
533                                         break;
534                                 if (language[j].count > 0)
535                                         break;
536                                 /* Fall through */
537                         case Normal:
538                         case First:
539                                 if (printed)
540                                         print(" & ");
541                                 else printed = 1;
542                                 print("%s", language[i].name);
543                                 break;
544                         case Shared:
545                         default:
546                                 break;
547                         }
548                 }
549         if(!printed)
550                 print("UTF");
551         print(" text\n");
552 }
553
554 void
555 wordfreq(void)
556 {
557         int low, high, mid, r;
558         uchar *p, *p2, c;
559
560         p = buf;
561         for(;;) {
562                 while (p < buf+nbuf && !isalpha(*p))
563                         p++;
564                 if (p >= buf+nbuf)
565                         return;
566                 p2 = p;
567                 while(p < buf+nbuf && isalpha(*p))
568                         p++;
569                 c = *p;
570                 *p = 0;
571                 high = sizeof(dict)/sizeof(dict[0]);
572                 for(low = 0;low < high;) {
573                         mid = (low+high)/2;
574                         r = strcmp(dict[mid].word, (char*)p2);
575                         if(r == 0) {
576                                 wfreq[dict[mid].class]++;
577                                 break;
578                         }
579                         if(r < 0)
580                                 low = mid+1;
581                         else
582                                 high = mid;
583                 }
584                 *p++ = c;
585         }
586 }
587
588 typedef struct Filemagic Filemagic;
589 struct Filemagic {
590         ulong x;
591         ulong mask;
592         char *desc;
593         char *mime;
594 };
595
596 /*
597  * integers in this table must be as seen on a little-endian machine
598  * when read from a file.
599  */
600 Filemagic long0tab[] = {
601         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
602         /* "pac1" */
603         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
604         /* "pXc2 */
605         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
606         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
607         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
608         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
609         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
610         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
611         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
612         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
613         0xf0ff,         0xf6ff,         "aac audio",    "audio/mpeg",
614         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
616         /* 0xfeedfacf */
617         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
618         /* 0xcefaedfe */
619         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
620         /* 0xcffaedfe */
621         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
622         /* 0xcafebabe */
623         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
624         /*
625          * venti & fossil magic numbers are stored big-endian on disk,
626          * thus the numbers appear reversed in this table.
627          */
628         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
629         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
630         0x1a53454e,     0xFFFFFFFF,     "NES ROM", OCTET,
631         /* tcpdump pcap file */
632         0xa1b2c3d4,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
633         0xd4c3b2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
634         0xa1b23c4d,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
635         0x4d3cb2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
636 };
637
638 int
639 filemagic(Filemagic *tab, int ntab, ulong x)
640 {
641         int i;
642
643         for(i=0; i<ntab; i++)
644                 if((x&tab[i].mask) == tab[i].x){
645                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
646                         return 1;
647                 }
648         return 0;
649 }
650
651 int
652 long0(void)
653 {
654         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
655 }
656
657 typedef struct Fileoffmag Fileoffmag;
658 struct Fileoffmag {
659         ulong   off;
660         Filemagic;
661 };
662
663 /*
664  * integers in this table must be as seen on a little-endian machine
665  * when read from a file.
666  */
667 Fileoffmag longofftab[] = {
668         /*
669          * venti & fossil magic numbers are stored big-endian on disk,
670          * thus the numbers appear reversed in this table.
671          */
672         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
673         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
674         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
675         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
676         0x100,    0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
677         0x1fc,    0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
678 };
679
680 int
681 fileoffmagic(Fileoffmag *tab, int ntab)
682 {
683         int i;
684         ulong x;
685         Fileoffmag *tp;
686         uchar buf[sizeof(long)];
687
688         for(i=0; i<ntab; i++) {
689                 tp = tab + i;
690                 seek(fd, tp->off, 0);
691                 if (readn(fd, buf, sizeof buf) != sizeof buf)
692                         continue;
693                 x = LENDIAN(buf);
694                 if((x&tp->mask) == tp->x){
695                         print("%s\n", mime ? tp->mime : tp->desc);
696                         return 1;
697                 }
698         }
699         return 0;
700 }
701
702 int
703 longoff(void)
704 {
705         return fileoffmagic(longofftab, nelem(longofftab));
706 }
707
708 int
709 isexec(void)
710 {
711         Fhdr f;
712
713         seek(fd, 0, 0);         /* reposition to start of file */
714         if(crackhdr(fd, &f)) {
715                 print("%s\n", mime ? OCTET : f.name);
716                 return 1;
717         }
718         return 0;
719 }
720
721
722 /* from tar.c */
723 enum { NAMSIZ = 100, TBLOCK = 512 };
724
725 union   hblock
726 {
727         char    dummy[TBLOCK];
728         struct  header
729         {
730                 char    name[NAMSIZ];
731                 char    mode[8];
732                 char    uid[8];
733                 char    gid[8];
734                 char    size[12];
735                 char    mtime[12];
736                 char    chksum[8];
737                 char    linkflag;
738                 char    linkname[NAMSIZ];
739                 /* rest are defined by POSIX's ustar format; see p1003.2b */
740                 char    magic[6];       /* "ustar" */
741                 char    version[2];
742                 char    uname[32];
743                 char    gname[32];
744                 char    devmajor[8];
745                 char    devminor[8];
746                 char    prefix[155];  /* if non-null, path = prefix "/" name */
747         } dbuf;
748 };
749
750 int
751 checksum(union hblock *hp)
752 {
753         int i;
754         char *cp;
755         struct header *hdr = &hp->dbuf;
756
757         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
758                 *cp = ' ';
759         i = 0;
760         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
761                 i += *cp & 0xff;
762         return i;
763 }
764
765 int
766 istar(void)
767 {
768         int chksum;
769         char tblock[TBLOCK];
770         union hblock *hp = (union hblock *)tblock;
771         struct header *hdr = &hp->dbuf;
772
773         seek(fd, 0, 0);         /* reposition to start of file */
774         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
775                 return 0;
776         chksum = strtol(hdr->chksum, 0, 8);
777         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
778                 if (strcmp(hdr->magic, "ustar") == 0)
779                         print(mime? "application/x-ustar\n": "posix tar archive\n");
780                 else
781                         print(mime? "application/x-tar\n": "tar archive\n");
782                 return 1;
783         }
784         return 0;
785 }
786
787 /*
788  * initial words to classify file
789  */
790 struct  FILE_STRING
791 {
792         char    *key;
793         char    *filetype;
794         int     length;
795         char    *mime;
796 } file_string[] =
797 {
798         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
799         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
800         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
801         "!<arch>\n__.SYMDEF",   "archive random library",       16,     OCTET,
802         "!<arch>\n",            "archive",                      8,      OCTET,
803         "070707",               "cpio archive - ascii header",  6,      OCTET,
804         "#!/bin/rc",            "rc executable file",           9,      PLAIN,
805         "#!/bin/sh",            "sh executable file",           9,      PLAIN,
806         "%!",                   "postscript",                   2,      "application/postscript",
807         "\004%!",               "postscript",                   3,      "application/postscript",
808         "x T post",             "troff output for post",        8,      "application/troff",
809         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
810         "x T utf",              "troff output for UTF",         7,      "application/troff",
811         "x T 202",              "troff output for 202",         7,      "application/troff",
812         "x T aps",              "troff output for aps",         7,      "application/troff",
813         "x T ",                 "troff output",                 4,      "application/troff",
814         "GIF",                  "GIF image",                    3,      "image/gif",
815         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
816         "%PDF",                 "PDF",                          4,      "application/pdf",
817         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
818         "<!doctype",            "HTML file",                    9,      "text/html",
819         "<!--",                 "HTML file",                    4,      "text/html",
820         "<html>",               "HTML file",                    6,      "text/html",
821         "<HTML>",               "HTML file",                    6,      "text/html",
822         "<?xml",                "HTML file",                    5,      "text/html",
823         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
824         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
825         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
826         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
827         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
828         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
829         "BM",                   "bmp",                          2,      "image/bmp", 
830         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
831         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
832         "\033E\033",            "HP PCL printer data",          3,      OCTET,
833         "\033&",                "HP PCL printer data",          2,      OCTET,
834         "\033%-12345X",         "HPJCL file",           9,      "application/hpjcl",
835         "\033Lua",              "Lua bytecode",         4,      OCTET,
836         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
837         "OggS",                 "ogg audio",            4,      "audio/ogg",
838         ".snd",                 "sun audio",            4,      "audio/basic",
839         "\211PNG",              "PNG image",            4,      "image/png",
840         "P1\n",                 "ppm",                  3,      "image/ppm",
841         "P2\n",                 "ppm",                  3,      "image/ppm",
842         "P3\n",                 "ppm",                  3,      "image/ppm",
843         "P4\n",                 "ppm",                  3,      "image/ppm",
844         "P5\n",                 "ppm",                  3,      "image/ppm",
845         "P6\n",                 "ppm",                  3,      "image/ppm",
846         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
847         ".HTML ",               "troff -ms input",      6,      "text/troff",
848         ".LP",                  "troff -ms input",      3,      "text/troff",
849         ".ND",                  "troff -ms input",      3,      "text/troff",
850         ".PP",                  "troff -ms input",      3,      "text/troff",
851         ".TL",                  "troff -ms input",      3,      "text/troff",
852         ".TR",                  "troff -ms input",      3,      "text/troff",
853         ".TH",                  "manual page",          3,      "text/troff",
854         ".\\\"",                "troff input",          3,      "text/troff",
855         ".de",                  "troff input",          3,      "text/troff",
856         ".if",                  "troff input",          3,      "text/troff",
857         ".nr",                  "troff input",          3,      "text/troff",
858         ".tr",                  "troff input",          3,      "text/troff",
859         "vac:",                 "venti score",          4,      PLAIN,
860         "-----BEGIN CERTIFICATE-----\n",
861                                 "pem certificate",      -1,     PLAIN,
862         "-----BEGIN TRUSTED CERTIFICATE-----\n",
863                                 "pem trusted certificate", -1,  PLAIN,
864         "-----BEGIN X509 CERTIFICATE-----\n",
865                                 "pem x.509 certificate", -1,    PLAIN,
866         "subject=/C=",          "pem certificate with header", -1, PLAIN,
867         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
868         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
869         "[playlist]",           "playlist",             10,     "application/x-scpls",
870         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
871         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
872         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
873         "AT&T",                 "DjVu document",        4,      "image/vnd.djvu",
874         "Extended module: ",    "XM audio",             17,     "audio/xm",
875         "MThd",                 "midi audio",           4,      "audio/midi",
876         "MUS\x1a",              "mus audio",            4,      "audio/mus",
877         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
878         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
879         "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
880         "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
881         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
882         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
883         "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
884         "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
885         0,0,0,0
886 };
887
888 int
889 istring(void)
890 {
891         int i, l;
892         struct FILE_STRING *p;
893
894         for(p = file_string; p->key; p++) {
895                 l = p->length;
896                 if(l == -1)
897                         l = strlen(p->key);
898                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
899                         print("%s\n", mime ? p->mime : p->filetype);
900                         return 1;
901                 }
902         }
903         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
904                 for(i = 5; i < nbuf; i++)
905                         if(buf[i] == '\n')
906                                 break;
907                 if(mime)
908                         print("%s\n", OCTET);
909                 else
910                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
911                 return 1;
912         }
913         return 0;
914 }
915
916 struct offstr
917 {
918         ulong   off;
919         struct FILE_STRING;
920 } offstrs[] = {
921         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
922         32*4, "DICM",   "DICOM medical imaging data",   4,      "application/dicom",
923         0, 0, 0, 0, 0
924 };
925
926 int
927 isoffstr(void)
928 {
929         int n;
930         char buf[256];
931         struct offstr *p;
932
933         for(p = offstrs; p->key; p++) {
934                 seek(fd, p->off, 0);
935                 n = p->length;
936                 if (n > sizeof buf)
937                         n = sizeof buf;
938                 if (readn(fd, buf, n) != n)
939                         continue;
940                 if(memcmp(buf, p->key, n) == 0) {
941                         print("%s\n", mime ? p->mime : p->filetype);
942                         return 1;
943                 }
944         }
945         return 0;
946 }
947
948 int
949 iff(void)
950 {
951         if (strncmp((char*)buf, "FORM", 4) == 0 &&
952             strncmp((char*)buf+8, "AIFF", 4) == 0) {
953                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
954                 return 1;
955         }
956         if (strncmp((char*)buf, "RIFF", 4) == 0) {
957                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
958                         print("%s\n", mime? "audio/wave": "wave audio");
959                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
960                         print("%s\n", mime? "video/avi": "avi video");
961                 else
962                         print("%s\n", mime? OCTET : "riff file");
963                 return 1;
964         }
965         return 0;
966 }
967
968 char*   html_string[] = {
969         "blockquote",
970         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
971         "caption",
972         "button", "center", "iframe", "object", "option", "script",
973         "select", "strong",
974         "blink", "embed", "frame", "input", "label", "param", "small",
975         "style", "table", "tbody", "tfoot", "thead", "title",
976         "?xml", "body", "code", "font", "form", "head", "html",
977         "link", "menu", "meta", "span",
978         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
979         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
980         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
981         "a", "b", "i", "p", "q", "u",
982         0,
983 };
984
985 int
986 ishtml(void)
987 {
988         int i, n, count;
989         uchar *p;
990
991         count = 0;
992         p = buf;
993         for(;;) {
994                 while(p < buf+nbuf && *p != '<')
995                         p++;
996                 p++;
997                 if (p >= buf+nbuf)
998                         break;
999                 if(*p == '/')
1000                         p++;
1001                 if(p >= buf+nbuf)
1002                         break;
1003                 for(i = 0; html_string[i]; i++){
1004                         n = strlen(html_string[i]);
1005                         if(p + n > buf+nbuf)
1006                                 continue;
1007                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1008                                 p += n;
1009                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1010                                         if(++count > 2) {
1011                                                 print("%s\n", mime ? "text/html" : "HTML file");
1012                                                 return 1;
1013                                         }
1014                                 }
1015                                 break;
1016                         }
1017                 }
1018         }
1019         return 0;
1020 }
1021
1022 char*   rfc822_string[] =
1023 {
1024         "from:",
1025         "date:",
1026         "to:",
1027         "subject:",
1028         "received:",
1029         "reply to:",
1030         "sender:",
1031         0,
1032 };
1033
1034 int
1035 isrfc822(void)
1036 {
1037
1038         char *p, *q, *r;
1039         int i, count;
1040
1041         count = 0;
1042         p = (char*)buf;
1043         for(;;) {
1044                 q = strchr(p, '\n');
1045                 if(q == nil)
1046                         break;
1047                 *q = 0;
1048                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1049                         count++;
1050                         *q = '\n';
1051                         p = q+1;
1052                         continue;
1053                 }
1054                 *q = '\n';
1055                 if(*p != '\t' && *p != ' '){
1056                         r = strchr(p, ':');
1057                         if(r == 0 || r > q)
1058                                 break;
1059                         for(i = 0; rfc822_string[i]; i++) {
1060                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1061                                         count++;
1062                                         break;
1063                                 }
1064                         }
1065                 }
1066                 p = q+1;
1067         }
1068         if(count >= 3){
1069                 print("%s\n", mime ? "message/rfc822" : "email file");
1070                 return 1;
1071         }
1072         return 0;
1073 }
1074
1075 int
1076 ismbox(void)
1077 {
1078         char *p, *q;
1079
1080         p = (char*)buf;
1081         q = strchr(p, '\n');
1082         if(q == nil)
1083                 return 0;
1084         *q = 0;
1085         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1086                 print("%s\n", mime ? PLAIN : "mail box");
1087                 return 1;
1088         }
1089         *q = '\n';
1090         return 0;
1091 }
1092
1093 int
1094 iscint(void)
1095 {
1096         int type;
1097         char *name;
1098         Biobuf b;
1099
1100         if(Binit(&b, fd, OREAD) == Beof)
1101                 return 0;
1102         seek(fd, 0, 0);
1103         type = objtype(&b, &name);
1104         if(type < 0)
1105                 return 0;
1106         if(mime)
1107                 print("%s\n", OCTET);
1108         else
1109                 print("%s intermediate\n", name);
1110         return 1;
1111 }
1112
1113 int
1114 isc(void)
1115 {
1116         int n;
1117
1118         n = wfreq[I1];
1119         /*
1120          * includes
1121          */
1122         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1123                 goto yes;
1124         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1125                 goto yes;
1126         /*
1127          * declarations
1128          */
1129         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1130                 goto yes;
1131         /*
1132          * assignments
1133          */
1134         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1135                 goto yes;
1136         return 0;
1137
1138 yes:
1139         if(mime){
1140                 print("%s\n", PLAIN);
1141                 return 1;
1142         }
1143         if(wfreq[Alword] > 0)
1144                 print("alef program\n");
1145         else
1146                 print("c program\n");
1147         return 1;
1148 }
1149
1150 int
1151 islimbo(void)
1152 {
1153         /*
1154          * includes
1155          */
1156         if(wfreq[Lword] < 4)
1157                 return 0;
1158         print("%s\n", mime ? PLAIN : "limbo program");
1159         return 1;
1160 }
1161
1162 int
1163 isas(void)
1164 {
1165         /*
1166          * includes
1167          */
1168         if(wfreq[Aword] < 2)
1169                 return 0;
1170         print("%s\n", mime ? PLAIN : "as program");
1171         return 1;
1172 }
1173
1174 int
1175 istga(void)
1176 {
1177         uchar *p;
1178
1179         p = buf;
1180         if(nbuf < 18)
1181                 return 0;
1182         if((p[12] | p[13]<<8) == 0)     /* width */
1183                 return 0;
1184         if((p[14] | p[15]<<8) == 0)     /* height */
1185                 return 0;
1186         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1187                 return 0;
1188         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1189                 return 0;
1190         if(p[1] == 0){  /* non color-mapped */
1191                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1192                         return 0;
1193                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1194                         return 0;
1195         } else
1196         if(p[1] == 1){  /* color-mapped */
1197                 if((p[2]&3) != 1 || p[7] == 0)  
1198                         return 0;
1199                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1200                         return 0;
1201         } else
1202                 return 0;
1203         print("%s\n", mime ? "image/tga" : "targa image");
1204         return 1;
1205 }
1206
1207 int
1208 ismp3(void)
1209 {
1210         uchar *p, *e;
1211
1212         p = buf;
1213         e = p + nbuf-1;
1214         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1215                 if((p[1] & 0xFE) == 0xFA){
1216                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1217                         return 1;
1218                 }
1219                 p++;
1220         }
1221         return 0;
1222 }
1223
1224 /*
1225  * low entropy means encrypted
1226  */
1227 int
1228 ismung(void)
1229 {
1230         int i, bucket[8];
1231         float cs;
1232
1233         if(nbuf < 64)
1234                 return 0;
1235         memset(bucket, 0, sizeof(bucket));
1236         for(i=nbuf-64; i<nbuf; i++)
1237                 bucket[(buf[i]>>5)&07] += 1;
1238
1239         cs = 0.;
1240         for(i=0; i<8; i++)
1241                 cs += (bucket[i]-8)*(bucket[i]-8);
1242         cs /= 8.;
1243         if(cs <= 24.322) {
1244                 if(buf[0]==0x1f && buf[1]==0x9d)
1245                         print("%s\n", mime ? "application/x-compress" : "compressed");
1246                 else
1247                 if(buf[0]==0x1f && buf[1]==0x8b)
1248                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1249                 else
1250                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1251                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1252                 else
1253                 if(buf[0]==0x78 && buf[1]==0x9c)
1254                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1255                 else
1256                         print("%s\n", mime ? OCTET : "encrypted");
1257                 return 1;
1258         }
1259         return 0;
1260 }
1261
1262 /*
1263  * english by punctuation and frequencies
1264  */
1265 int
1266 isenglish(void)
1267 {
1268         int vow, comm, rare, badpun, punct;
1269         char *p;
1270
1271         if(guess != Fascii && guess != Feascii)
1272                 return 0;
1273         badpun = 0;
1274         punct = 0;
1275         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1276                 switch(*p) {
1277                 case '.':
1278                 case ',':
1279                 case ')':
1280                 case '%':
1281                 case ';':
1282                 case ':':
1283                 case '?':
1284                         punct++;
1285                         if(p[1] != ' ' && p[1] != '\n')
1286                                 badpun++;
1287                 }
1288         if(badpun*5 > punct)
1289                 return 0;
1290         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1291                 return 0;
1292         if(2*cfreq[';'] > cfreq['e'])
1293                 return 0;
1294
1295         vow = 0;
1296         for(p="AEIOU"; *p; p++) {
1297                 vow += cfreq[*p];
1298                 vow += cfreq[tolower(*p)];
1299         }
1300         comm = 0;
1301         for(p="ETAION"; *p; p++) {
1302                 comm += cfreq[*p];
1303                 comm += cfreq[tolower(*p)];
1304         }
1305         rare = 0;
1306         for(p="VJKQXZ"; *p; p++) {
1307                 rare += cfreq[*p];
1308                 rare += cfreq[tolower(*p)];
1309         }
1310         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1311                 print("%s\n", mime ? PLAIN : "English text");
1312                 return 1;
1313         }
1314         return 0;
1315 }
1316
1317 /*
1318  * pick up a number with
1319  * syntax _*[0-9]+_
1320  */
1321 #define P9BITLEN        12
1322 int
1323 p9bitnum(char *s, int *v)
1324 {
1325         char *es;
1326
1327         if(s[P9BITLEN-1] != ' ')
1328                 return -1;
1329         s[P9BITLEN-1] = '\0';
1330         *v = strtol(s, &es, 10);
1331         s[P9BITLEN-1] = ' ';
1332         if(es != &s[P9BITLEN-1])
1333                 return -1;
1334         return 0;
1335 }
1336
1337 int
1338 depthof(char *s, int *newp)
1339 {
1340         char *es;
1341         int d;
1342
1343         *newp = 0;
1344         es = s+12;
1345         while(s<es && *s==' ')
1346                 s++;
1347         if(s == es)
1348                 return -1;
1349         if('0'<=*s && *s<='9')
1350                 return 1<<strtol(s, nil, 0);
1351
1352         *newp = 1;
1353         d = 0;
1354         while(s<es && *s!=' '){
1355                 if(strchr("rgbkamx", *s) == nil)
1356                         return -1;
1357                 s++;
1358                 if('0'<=*s && *s<='9')
1359                         d += strtoul(s, &s, 10);
1360                 else
1361                         return -1;
1362         }
1363
1364         if(d % 8 == 0 || 8 % d == 0)
1365                 return d;
1366         else
1367                 return -1;
1368 }
1369
1370 int
1371 isp9bit(void)
1372 {
1373         int dep, lox, loy, hix, hiy, px, new, cmpr;
1374         long len;
1375         char *newlabel;
1376         uchar *cp;
1377
1378         cp = buf;
1379         cmpr = 0;
1380         if(memcmp(cp, "compressed\n", 11) == 0) {
1381                 cmpr = 1;
1382                 cp = buf + 11;
1383         }
1384
1385         if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1386                 return 0;
1387         newlabel = new ? "" : "old ";
1388         if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1389                 return 0;
1390         if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1391                 return 0;
1392         if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1393                 return 0;
1394         if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1395                 return 0;
1396
1397         hix -= lox;
1398         hiy -= loy;
1399         if(hix <= 0 || hiy <= 0)
1400                 return 0;
1401
1402         if(dep < 8){
1403                 px = 8/dep;             /* pixels per byte */
1404                 /* set l to number of bytes of data per scan line */
1405                 len = (hix+px-1)/px;
1406         }else
1407                 len = hix*dep/8;
1408         len *= hiy;                     /* col length */
1409         len += 5 * P9BITLEN;            /* size of initial ascii */
1410
1411         /*
1412          * for compressed images, don't look any further. otherwise:
1413          * for image file, length is non-zero and must match calculation above.
1414          * for /dev/window and /dev/screen the length is always zero.
1415          * for subfont, the subfont header should follow immediately.
1416          */
1417         if (cmpr) {
1418                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1419                         newlabel, dep, hix, hiy);
1420                 return 1;
1421         }
1422         /*
1423          * mbuf->length == 0 probably indicates reading a pipe.
1424          * Ghostscript sometimes produces a little extra on the end.
1425          */
1426         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1427             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1428                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1429                         newlabel, dep, hix, hiy);
1430                 return 1;
1431         }
1432         if (p9subfont(buf+len)) {
1433                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1434                         newlabel, dep, hix, hiy);
1435                 return 1;
1436         }
1437         return 0;
1438 }
1439
1440 int
1441 p9subfont(uchar *p)
1442 {
1443         int n, h, a;
1444
1445         /* if image too big, assume it's a subfont */
1446         if (p+3*P9BITLEN > buf+sizeof(buf))
1447                 return 1;
1448
1449         if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0)    /* char count */
1450                 return 0;
1451         if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0)    /* height */
1452                 return 0;
1453         if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0)    /* ascent */
1454                 return 0;
1455         if(n > 0 && h > 0 && a >= 0)
1456                 return 1;
1457         return 0;
1458 }
1459
1460 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1461
1462 int
1463 isp9font(void)
1464 {
1465         uchar *cp, *p;
1466         int i, n;
1467         char pathname[1024];
1468
1469         cp = buf;
1470         if (!getfontnum(cp, &cp))       /* height */
1471                 return 0;
1472         if (!getfontnum(cp, &cp))       /* ascent */
1473                 return 0;
1474         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1475                 if (!getfontnum(cp, &cp))       /* min */
1476                         break;
1477                 if (!getfontnum(cp, &cp))       /* max */
1478                         return 0;
1479                 getfontnum(cp, &cp);    /* optional offset */
1480                 while (WHITESPACE(*cp))
1481                         cp++;
1482                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1483                                 ;
1484                         /* construct a path name, if needed */
1485                 n = 0;
1486                 if (*p != '/' && slash) {
1487                         n = slash-fname+1;
1488                         if (n < sizeof(pathname))
1489                                 memcpy(pathname, fname, n);
1490                         else n = 0;
1491                 }
1492                 if (n+cp-p+4 < sizeof(pathname)) {
1493                         memcpy(pathname+n, p, cp-p);
1494                         n += cp-p;
1495                         pathname[n] = 0;
1496                         if (access(pathname, AEXIST) < 0) {
1497                                 strcpy(pathname+n, ".0");
1498                                 if (access(pathname, AEXIST) < 0)
1499                                         return 0;
1500                         }
1501                 }
1502         }
1503         if (i) {
1504                 print("%s\n", mime ? PLAIN : "font file");
1505                 return 1;
1506         }
1507         return 0;
1508 }
1509
1510 int
1511 getfontnum(uchar *cp, uchar **rp)
1512 {
1513         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1514                 cp++;
1515         if (*cp < '0' || *cp > '9')
1516                 return 0;
1517         strtoul((char *)cp, (char **)rp, 0);
1518         if (!WHITESPACE(**rp)) {
1519                 *rp = cp;
1520                 return 0;
1521         }
1522         return 1;
1523 }
1524
1525 int
1526 isrtf(void)
1527 {
1528         if(strstr((char *)buf, "\\rtf1")){
1529                 print(mime ? "application/rtf\n" : "rich text format\n");
1530                 return 1;
1531         }
1532         return 0;
1533 }
1534
1535 int
1536 ismsdos(void)
1537 {
1538         if (buf[0] == 0x4d && buf[1] == 0x5a){
1539                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1540                 return 1;
1541         }
1542         return 0;
1543 }
1544
1545 int
1546 isicocur(void)
1547 {
1548         if(buf[0] || buf[1] || buf[3] || buf[9])
1549                 return 0;
1550         if(buf[4] == 0x00 && buf[5] == 0x00)
1551                 return 0;
1552         switch(buf[2]){
1553         case 1:
1554                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1555                 return 1;
1556         case 2:
1557                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1558                 return 1;
1559         }
1560         return 0;
1561 }
1562
1563 int
1564 iself(void)
1565 {
1566         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1567         [1]     "WE32100",
1568         [2]     "SPARC",
1569         [3]     "i386",
1570         [4]     "M68000",
1571         [5]     "M88000",
1572         [6]     "i486",
1573         [7]     "i860",
1574         [8]     "R3000",
1575         [9]     "S370",
1576         [10]    "R4000",
1577         [15]    "HP-PA",
1578         [18]    "sparc v8+",
1579         [19]    "i960",
1580         [20]    "PPC-32",
1581         [21]    "PPC-64",
1582         [40]    "ARM",
1583         [41]    "Alpha",
1584         [43]    "sparc v9",
1585         [50]    "IA-64",
1586         [62]    "AMD64",
1587         [75]    "VAX",
1588         };
1589         static char *type[] = {
1590         [1]     "relocatable object",
1591         [2]     "executable",
1592         [3]     "shared library",
1593         [4]     "core dump",
1594         };
1595
1596         if (memcmp(buf, "\x7fELF", 4) == 0){
1597                 if (!mime){
1598                         int isdifend = 0;
1599                         int n = (buf[19] << 8) | buf[18];
1600                         char *p = "unknown";
1601                         char *t = "unknown";
1602
1603                         if (n > 0 && n < nelem(cpu) && cpu[n])
1604                                 p = cpu[n];
1605                         else {
1606                                 /* try the other byte order */
1607                                 isdifend = 1;
1608                                 n = (buf[18] << 8) | buf[19];
1609                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1610                                         p = cpu[n];
1611                         }
1612                         if(isdifend)
1613                                 n = (buf[16]<< 8) | buf[17];
1614                         else
1615                                 n = (buf[17]<< 8) | buf[16];
1616
1617                         if(n>0 && n < nelem(type) && type[n])
1618                                 t = type[n];
1619                         print("%s ELF %s\n", p, t);
1620                 }
1621                 else
1622                         print("application/x-elf-executable\n");
1623                 return 1;
1624         }
1625
1626         return 0;
1627 }
1628
1629 int
1630 isface(void)
1631 {
1632         int i, j, ldepth, l;
1633         char *p;
1634
1635         ldepth = -1;
1636         for(j = 0; j < 3; j++){
1637                 for(p = (char*)buf, i=0; i<3; i++){
1638                         if(p[0] != '0' || p[1] != 'x')
1639                                 return 0;
1640                         if(buf[2+8] == ',')
1641                                 l = 2;
1642                         else if(buf[2+4] == ',')
1643                                 l = 1;
1644                         else
1645                                 return 0;
1646                         if(ldepth == -1)
1647                                 ldepth = l;
1648                         if(l != ldepth)
1649                                 return 0;
1650                         strtoul(p, &p, 16);
1651                         if(*p++ != ',')
1652                                 return 0;
1653                         while(*p == ' ' || *p == '\t')
1654                                 p++;
1655                 }
1656                 if (*p++ != '\n')
1657                         return 0;
1658         }
1659
1660         if(mime)
1661                 print("application/x-face\n");
1662         else
1663                 print("face image depth %d\n", ldepth);
1664         return 1;
1665 }