]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
merge
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismp4(void);
156 int     ismung(void);
157 int     isp9bit(void);
158 int     isp9font(void);
159 int     isrtf(void);
160 int     ismsdos(void);
161 int     isicocur(void);
162 int     iself(void);
163 int     istring(void);
164 int     isoffstr(void);
165 int     iff(void);
166 int     long0(void);
167 int     longoff(void);
168 int     istar(void);
169 int     isface(void);
170 int     isexec(void);
171 int     isudiff(void);
172 int     p9bitnum(char*, int*);
173 int     p9subfont(uchar*);
174 void    print_utf(void);
175 void    type(char*, int);
176 int     utf_count(void);
177 void    wordfreq(void);
178
179 int     (*call[])(void) =
180 {
181         long0,          /* recognizable by first 4 bytes */
182         istring,        /* recognizable by first string */
183         iself,          /* ELF (foreign) executable */
184         isexec,         /* native executables */
185         iff,            /* interchange file format (strings) */
186         longoff,        /* recognizable by 4 bytes at some offset */
187         isoffstr,       /* recognizable by string at some offset */
188         isudiff,        /* unified diff output */
189         isrfc822,       /* email file */
190         ismbox,         /* mail box */
191         istar,          /* recognizable by tar checksum */
192         iscint,         /* compiler/assembler intermediate */
193         ishtml,         /* html keywords */
194         islimbo,        /* limbo source */
195         isc,            /* c & alef compiler key words */
196         isas,           /* assembler key words */
197         isp9font,       /* plan 9 font */
198         isp9bit,        /* plan 9 image (as from /dev/window) */
199         isrtf,          /* rich text format */
200         ismsdos,        /* msdos exe (virus file attachement) */
201         isicocur,               /* windows icon or cursor file */
202         isface,         /* ascii face file */
203         istga,
204         ismp4,
205         ismp3,
206
207         /* last resorts */
208         ismung,         /* entropy compressed/encrypted */
209         isenglish,      /* char frequency English */
210         0
211 };
212
213 int mime;
214
215 char OCTET[] =  "application/octet-stream";
216 char PLAIN[] =  "text/plain";
217
218 void
219 main(int argc, char *argv[])
220 {
221         int i, j, maxlen;
222         char *cp;
223         Rune r;
224
225         ARGBEGIN{
226         case 'm':
227                 mime = 1;
228                 break;
229         default:
230                 fprint(2, "usage: file [-m] [file...]\n");
231                 exits("usage");
232         }ARGEND;
233
234         maxlen = 0;
235         if(mime == 0 || argc > 1){
236                 for(i = 0; i < argc; i++) {
237                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
238                                         ;
239                         if(j > maxlen)
240                                 maxlen = j;
241                 }
242         }
243         if (argc <= 0) {
244                 if(!mime)
245                         print ("stdin: ");
246                 filetype(0);
247         }
248         else {
249                 for(i = 0; i < argc; i++)
250                         type(argv[i], maxlen);
251         }
252         exits(0);
253 }
254
255 void
256 type(char *file, int nlen)
257 {
258         Rune r;
259         int i;
260         char *p;
261
262         if(nlen > 0){
263                 slash = 0;
264                 for (i = 0, p = file; *p; i++) {
265                         if (*p == '/')                  /* find rightmost slash */
266                                 slash = p;
267                         p += chartorune(&r, p);         /* count runes */
268                 }
269                 print("%s:%*s",file, nlen-i+1, "");
270         }
271         fname = file;
272         if ((fd = open(file, OREAD)) < 0) {
273                 fprint(2, "cannot open: %r\n");
274                 return;
275         }
276         filetype(fd);
277         close(fd);
278 }
279
280 void
281 utfconv(void)
282 {
283         Rune r;
284         uchar *rb;
285         char *p, *e;
286         int i;
287
288         if(nbuf < 4)
289                 return;
290
291         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
292                 if(!mime)
293                         print("utf-32be ");
294                 return;
295         } else
296         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
297                 if(!mime)
298                         print("utf-32le ");
299                 return;
300         } else
301         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
302                 memmove(buf, buf+3, nbuf-3);
303                 nbuf -= 3;
304                 return;
305         } else
306         if(memcmp(buf, "\xFE\xFF", 2) == 0){
307                 if(!mime)
308                         print("utf-16be ");
309
310                 nbuf -= 2;
311                 rb = malloc(nbuf+1);
312                 memmove(rb, buf+2, nbuf);
313                 p = (char*)buf;
314                 e = p+sizeof(buf)-UTFmax-1;
315                 for(i=0; i<nbuf && p < e; i+=2){
316                         r = rb[i+1] | rb[i]<<8;
317                         p += runetochar(p, &r);
318                 }
319                 *p = 0;
320                 free(rb);
321                 nbuf = p - (char*)buf;
322         } else
323         if(memcmp(buf, "\xFF\xFE", 2) == 0){
324                 if(!mime)
325                         print("utf-16le ");
326
327                 nbuf -= 2;
328                 rb = malloc(nbuf+1);
329                 memmove(rb, buf+2, nbuf);
330                 p = (char*)buf;
331                 e = p+sizeof(buf)-UTFmax-1;
332                 for(i=0; i<nbuf && p < e; i+=2){
333                         r = rb[i] | rb[i+1]<<8;
334                         p += runetochar(p, &r);
335                 }
336                 *p = 0;
337                 free(rb);
338                 nbuf = p - (char*)buf;
339         }
340 }
341
342 void
343 filetype(int fd)
344 {
345         Rune r;
346         int i, f, n;
347         char *p, *eob;
348
349         free(mbuf);
350         mbuf = dirfstat(fd);
351         if(mbuf == nil){
352                 fprint(2, "cannot stat: %r\n");
353                 return;
354         }
355         if(mbuf->mode & DMDIR) {
356                 print("%s\n", mime ? OCTET : "directory");
357                 return;
358         }
359         if(mbuf->type != 'M' && mbuf->type != '|') {
360                 if(mime)
361                         print("%s\n", OCTET);
362                 else
363                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
364                 return;
365         }
366         /* may be reading a pipe on standard input */
367         nbuf = readn(fd, buf, sizeof(buf)-1);
368         if(nbuf < 0) {
369                 fprint(2, "cannot read: %r\n");
370                 return;
371         }
372         if(nbuf == 0) {
373                 print("%s\n", mime ? PLAIN : "empty file");
374                 return;
375         }
376         buf[nbuf] = 0;
377
378         utfconv();
379
380         /*
381          * build histogram table
382          */
383         memset(cfreq, 0, sizeof(cfreq));
384         for (i = 0; language[i].name; i++)
385                 language[i].count = 0;
386         eob = (char *)buf+nbuf;
387         for(n = 0, p = (char *)buf; p < eob; n++) {
388                 if (!fullrune(p, eob-p) && eob-p < UTFmax)
389                         break;
390                 p += chartorune(&r, p);
391                 if (r == 0)
392                         f = Cnull;
393                 else if (r <= 0x7f) {
394                         if (!isprint(r) && !isspace(r))
395                                 f = Ceascii;    /* ASCII control char */
396                         else f = r;
397                 } else if (r == 0x80) {
398                         bump_utf_count(r);
399                         f = Cutf;
400                 } else if (r < 0xA0)
401                         f = Cbinary;    /* Invalid Runes */
402                 else if (r <= 0xff)
403                         f = Clatin;     /* Latin 1 */
404                 else {
405                         bump_utf_count(r);
406                         f = Cutf;               /* UTF extension */
407                 }
408                 cfreq[f]++;                     /* ASCII chars peg directly */
409         }
410         /*
411          * gross classify
412          */
413         if (cfreq[Cbinary])
414                 guess = Fbinary;
415         else if (cfreq[Cutf])
416                 guess = Futf;
417         else if (cfreq[Clatin])
418                 guess = Flatin;
419         else if (cfreq[Ceascii])
420                 guess = Feascii;
421         else if (cfreq[Cnull])
422                 guess = Fbinary;
423         else
424                 guess = Fascii;
425         /*
426          * lookup dictionary words
427          */
428         memset(wfreq, 0, sizeof(wfreq));
429         if(guess == Fascii || guess == Flatin || guess == Futf)
430                 wordfreq();
431         /*
432          * call individual classify routines
433          */
434         for(i=0; call[i]; i++)
435                 if((*call[i])())
436                         return;
437
438         /*
439          * if all else fails,
440          * print out gross classification
441          */
442         if (nbuf < 100 && !mime)
443                 print(mime ? PLAIN : "short ");
444         if (guess == Fascii)
445                 print("%s\n", mime ? PLAIN : "Ascii");
446         else if (guess == Feascii)
447                 print("%s\n", mime ? PLAIN : "extended ascii");
448         else if (guess == Flatin)
449                 print("%s\n", mime ? PLAIN : "latin ascii");
450         else if (guess == Futf && utf_count() < 4)
451                 print_utf();
452         else print("%s\n", mime ? OCTET : "binary");
453 }
454
455 void
456 bump_utf_count(Rune r)
457 {
458         int low, high, mid;
459
460         high = sizeof(language)/sizeof(language[0])-1;
461         for (low = 0; low < high;) {
462                 mid = (low+high)/2;
463                 if (r >= language[mid].low) {
464                         if (r <= language[mid].high) {
465                                 language[mid].count++;
466                                 break;
467                         } else low = mid+1;
468                 } else high = mid;
469         }
470 }
471
472 int
473 utf_count(void)
474 {
475         int i, count;
476
477         count = 0;
478         for (i = 0; language[i].name; i++)
479                 if (language[i].count > 0)
480                         switch (language[i].mode) {
481                         case Normal:
482                         case First:
483                                 count++;
484                                 break;
485                         default:
486                                 break;
487                         }
488         return count;
489 }
490
491 int
492 chkascii(void)
493 {
494         int i;
495
496         for (i = 'a'; i < 'z'; i++)
497                 if (cfreq[i])
498                         return 1;
499         for (i = 'A'; i < 'Z'; i++)
500                 if (cfreq[i])
501                         return 1;
502         return 0;
503 }
504
505 int
506 find_first(char *name)
507 {
508         int i;
509
510         for (i = 0; language[i].name != 0; i++)
511                 if (language[i].mode == First
512                         && strcmp(language[i].name, name) == 0)
513                         return i;
514         return -1;
515 }
516
517 void
518 print_utf(void)
519 {
520         int i, printed, j;
521
522         if(mime){
523                 print("%s\n", PLAIN);
524                 return;
525         }
526         if (chkascii()) {
527                 printed = 1;
528                 print("Ascii");
529         } else
530                 printed = 0;
531         for (i = 0; language[i].name; i++)
532                 if (language[i].count) {
533                         switch(language[i].mode) {
534                         case Multi:
535                                 j = find_first(language[i].name);
536                                 if (j < 0)
537                                         break;
538                                 if (language[j].count > 0)
539                                         break;
540                                 /* Fall through */
541                         case Normal:
542                         case First:
543                                 if (printed)
544                                         print(" & ");
545                                 else printed = 1;
546                                 print("%s", language[i].name);
547                                 break;
548                         case Shared:
549                         default:
550                                 break;
551                         }
552                 }
553         if(!printed)
554                 print("UTF");
555         print(" text\n");
556 }
557
558 void
559 wordfreq(void)
560 {
561         int low, high, mid, r;
562         uchar *p, *p2, c;
563
564         p = buf;
565         for(;;) {
566                 while (p < buf+nbuf && !isalpha(*p))
567                         p++;
568                 if (p >= buf+nbuf)
569                         return;
570                 p2 = p;
571                 while(p < buf+nbuf && isalpha(*p))
572                         p++;
573                 c = *p;
574                 *p = 0;
575                 high = sizeof(dict)/sizeof(dict[0]);
576                 for(low = 0;low < high;) {
577                         mid = (low+high)/2;
578                         r = strcmp(dict[mid].word, (char*)p2);
579                         if(r == 0) {
580                                 wfreq[dict[mid].class]++;
581                                 break;
582                         }
583                         if(r < 0)
584                                 low = mid+1;
585                         else
586                                 high = mid;
587                 }
588                 *p++ = c;
589         }
590 }
591
592 typedef struct Filemagic Filemagic;
593 struct Filemagic {
594         ulong x;
595         ulong mask;
596         char *desc;
597         char *mime;
598 };
599
600 /*
601  * integers in this table must be as seen on a little-endian machine
602  * when read from a file.
603  */
604 Filemagic long0tab[] = {
605         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
606         /* "pac1" */
607         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
608         /* "pXc2 */
609         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
610         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
611         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
612         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
613         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
614         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
615         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
616         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
617         0xf0ff,         0xf6ff,         "aac audio",    "audio/mpeg",
618         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
619         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
620         /* 0xfeedfacf */
621         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
622         /* 0xcefaedfe */
623         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
624         /* 0xcffaedfe */
625         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
626         /* 0xcafebabe */
627         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
628         /*
629          * venti & fossil magic numbers are stored big-endian on disk,
630          * thus the numbers appear reversed in this table.
631          */
632         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
633         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
634         0x1a53454e,     0xFFFFFFFF,     "NES ROM", OCTET,
635         /* tcpdump pcap file */
636         0xa1b2c3d4,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
637         0xd4c3b2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
638         0xa1b23c4d,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
639         0x4d3cb2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
640 };
641
642 int
643 filemagic(Filemagic *tab, int ntab, ulong x)
644 {
645         int i;
646
647         for(i=0; i<ntab; i++)
648                 if((x&tab[i].mask) == tab[i].x){
649                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
650                         return 1;
651                 }
652         return 0;
653 }
654
655 int
656 long0(void)
657 {
658         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
659 }
660
661 typedef struct Fileoffmag Fileoffmag;
662 struct Fileoffmag {
663         ulong   off;
664         Filemagic;
665 };
666
667 /*
668  * integers in this table must be as seen on a little-endian machine
669  * when read from a file.
670  */
671 Fileoffmag longofftab[] = {
672         /*
673          * venti & fossil magic numbers are stored big-endian on disk,
674          * thus the numbers appear reversed in this table.
675          */
676         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
677         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
678         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
679         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
680         0x100,    0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
681         0x1fc,    0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
682 };
683
684 int
685 fileoffmagic(Fileoffmag *tab, int ntab)
686 {
687         int i;
688         ulong x;
689         Fileoffmag *tp;
690         uchar buf[sizeof(long)];
691
692         for(i=0; i<ntab; i++) {
693                 tp = tab + i;
694                 seek(fd, tp->off, 0);
695                 if (readn(fd, buf, sizeof buf) != sizeof buf)
696                         continue;
697                 x = LENDIAN(buf);
698                 if((x&tp->mask) == tp->x){
699                         print("%s\n", mime ? tp->mime : tp->desc);
700                         return 1;
701                 }
702         }
703         return 0;
704 }
705
706 int
707 longoff(void)
708 {
709         return fileoffmagic(longofftab, nelem(longofftab));
710 }
711
712 int
713 isexec(void)
714 {
715         Fhdr f;
716
717         seek(fd, 0, 0);         /* reposition to start of file */
718         if(crackhdr(fd, &f)) {
719                 print("%s\n", mime ? OCTET : f.name);
720                 return 1;
721         }
722         return 0;
723 }
724
725
726 /* from tar.c */
727 enum { NAMSIZ = 100, TBLOCK = 512 };
728
729 union   hblock
730 {
731         char    dummy[TBLOCK];
732         struct  header
733         {
734                 char    name[NAMSIZ];
735                 char    mode[8];
736                 char    uid[8];
737                 char    gid[8];
738                 char    size[12];
739                 char    mtime[12];
740                 char    chksum[8];
741                 char    linkflag;
742                 char    linkname[NAMSIZ];
743                 /* rest are defined by POSIX's ustar format; see p1003.2b */
744                 char    magic[6];       /* "ustar" */
745                 char    version[2];
746                 char    uname[32];
747                 char    gname[32];
748                 char    devmajor[8];
749                 char    devminor[8];
750                 char    prefix[155];  /* if non-null, path = prefix "/" name */
751         } dbuf;
752 };
753
754 int
755 checksum(union hblock *hp)
756 {
757         int i;
758         char *cp;
759         struct header *hdr = &hp->dbuf;
760
761         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
762                 *cp = ' ';
763         i = 0;
764         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
765                 i += *cp & 0xff;
766         return i;
767 }
768
769 int
770 istar(void)
771 {
772         int chksum;
773         char tblock[TBLOCK];
774         union hblock *hp = (union hblock *)tblock;
775         struct header *hdr = &hp->dbuf;
776
777         seek(fd, 0, 0);         /* reposition to start of file */
778         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
779                 return 0;
780         chksum = strtol(hdr->chksum, 0, 8);
781         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
782                 if (strcmp(hdr->magic, "ustar") == 0)
783                         print(mime? "application/x-ustar\n": "posix tar archive\n");
784                 else
785                         print(mime? "application/x-tar\n": "tar archive\n");
786                 return 1;
787         }
788         return 0;
789 }
790
791 /*
792  * initial words to classify file
793  */
794 struct  FILE_STRING
795 {
796         char    *key;
797         char    *filetype;
798         int     length;
799         char    *mime;
800 } file_string[] =
801 {
802         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
803         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
804         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
805         "!<arch>\n__.SYMDEF",   "archive random library",       16,     OCTET,
806         "!<arch>\n",            "archive",                      8,      OCTET,
807         "070707",               "cpio archive - ascii header",  6,      OCTET,
808         "#!/bin/rc",            "rc executable file",           9,      PLAIN,
809         "#!/bin/sh",            "sh executable file",           9,      PLAIN,
810         "%!",                   "postscript",                   2,      "application/postscript",
811         "\004%!",               "postscript",                   3,      "application/postscript",
812         "x T post",             "troff output for post",        8,      "application/troff",
813         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
814         "x T utf",              "troff output for UTF",         7,      "application/troff",
815         "x T 202",              "troff output for 202",         7,      "application/troff",
816         "x T aps",              "troff output for aps",         7,      "application/troff",
817         "x T ",                 "troff output",                 4,      "application/troff",
818         "GIF",                  "GIF image",                    3,      "image/gif",
819         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
820         "%PDF",                 "PDF",                          4,      "application/pdf",
821         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
822         "<!doctype",            "HTML file",                    9,      "text/html",
823         "<!--",                 "HTML file",                    4,      "text/html",
824         "<html>",               "HTML file",                    6,      "text/html",
825         "<HTML>",               "HTML file",                    6,      "text/html",
826         "<?xml",                "HTML file",                    5,      "text/html",
827         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
828         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
829         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
830         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
831         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
832         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
833         "BM",                   "bmp",                          2,      "image/bmp", 
834         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
835         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
836         "\033E\033",            "HP PCL printer data",          3,      OCTET,
837         "\033&",                "HP PCL printer data",          2,      OCTET,
838         "\033%-12345X",         "HPJCL file",           9,      "application/hpjcl",
839         "\033Lua",              "Lua bytecode",         4,      OCTET,
840         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
841         "OggS",                 "ogg audio",            4,      "audio/ogg",
842         ".snd",                 "sun audio",            4,      "audio/basic",
843         "\211PNG",              "PNG image",            4,      "image/png",
844         "P1\n",                 "ppm",                  3,      "image/ppm",
845         "P2\n",                 "ppm",                  3,      "image/ppm",
846         "P3\n",                 "ppm",                  3,      "image/ppm",
847         "P4\n",                 "ppm",                  3,      "image/ppm",
848         "P5\n",                 "ppm",                  3,      "image/ppm",
849         "P6\n",                 "ppm",                  3,      "image/ppm",
850         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
851         ".HTML ",               "troff -ms input",      6,      "text/troff",
852         ".LP",                  "troff -ms input",      3,      "text/troff",
853         ".ND",                  "troff -ms input",      3,      "text/troff",
854         ".PP",                  "troff -ms input",      3,      "text/troff",
855         ".TL",                  "troff -ms input",      3,      "text/troff",
856         ".TR",                  "troff -ms input",      3,      "text/troff",
857         ".TH",                  "manual page",          3,      "text/troff",
858         ".\\\"",                "troff input",          3,      "text/troff",
859         ".de",                  "troff input",          3,      "text/troff",
860         ".if",                  "troff input",          3,      "text/troff",
861         ".nr",                  "troff input",          3,      "text/troff",
862         ".tr",                  "troff input",          3,      "text/troff",
863         "vac:",                 "venti score",          4,      PLAIN,
864         "-----BEGIN CERTIFICATE-----\n",
865                                 "pem certificate",      -1,     PLAIN,
866         "-----BEGIN TRUSTED CERTIFICATE-----\n",
867                                 "pem trusted certificate", -1,  PLAIN,
868         "-----BEGIN X509 CERTIFICATE-----\n",
869                                 "pem x.509 certificate", -1,    PLAIN,
870         "subject=/C=",          "pem certificate with header", -1, PLAIN,
871         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
872         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
873         "[playlist]",           "playlist",             10,     "application/x-scpls",
874         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
875         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
876         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
877         "AT&T",                 "DjVu document",        4,      "image/vnd.djvu",
878         "Extended module: ",    "XM audio",             17,     "audio/xm",
879         "MThd",                 "midi audio",           4,      "audio/midi",
880         "MUS\x1a",              "mus audio",            4,      "audio/mus",
881         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
882         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
883         "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
884         "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
885         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
886         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
887         "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
888         "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
889         0,0,0,0
890 };
891
892 int
893 istring(void)
894 {
895         int i, l;
896         struct FILE_STRING *p;
897
898         for(p = file_string; p->key; p++) {
899                 l = p->length;
900                 if(l == -1)
901                         l = strlen(p->key);
902                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
903                         print("%s\n", mime ? p->mime : p->filetype);
904                         return 1;
905                 }
906         }
907         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
908                 for(i = 5; i < nbuf; i++)
909                         if(buf[i] == '\n')
910                                 break;
911                 if(mime)
912                         print("%s\n", OCTET);
913                 else
914                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
915                 return 1;
916         }
917         return 0;
918 }
919
920 struct offstr
921 {
922         ulong   off;
923         struct FILE_STRING;
924 } offstrs[] = {
925         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
926         32*4, "DICM",   "DICOM medical imaging data",   4,      "application/dicom",
927         0, 0, 0, 0, 0
928 };
929
930 int
931 isoffstr(void)
932 {
933         int n;
934         char buf[256];
935         struct offstr *p;
936
937         for(p = offstrs; p->key; p++) {
938                 seek(fd, p->off, 0);
939                 n = p->length;
940                 if (n > sizeof buf)
941                         n = sizeof buf;
942                 if (readn(fd, buf, n) != n)
943                         continue;
944                 if(memcmp(buf, p->key, n) == 0) {
945                         print("%s\n", mime ? p->mime : p->filetype);
946                         return 1;
947                 }
948         }
949         return 0;
950 }
951
952 int
953 iff(void)
954 {
955         if (strncmp((char*)buf, "FORM", 4) == 0 &&
956             strncmp((char*)buf+8, "AIFF", 4) == 0) {
957                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
958                 return 1;
959         }
960         if (strncmp((char*)buf, "RIFF", 4) == 0) {
961                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
962                         print("%s\n", mime? "audio/wave": "wave audio");
963                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
964                         print("%s\n", mime? "video/avi": "avi video");
965                 else
966                         print("%s\n", mime? OCTET : "riff file");
967                 return 1;
968         }
969         return 0;
970 }
971
972 char*   html_string[] = {
973         "blockquote",
974         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
975         "caption",
976         "button", "center", "iframe", "object", "option", "script",
977         "select", "strong",
978         "blink", "embed", "frame", "input", "label", "param", "small",
979         "style", "table", "tbody", "tfoot", "thead", "title",
980         "?xml", "body", "code", "font", "form", "head", "html",
981         "link", "menu", "meta", "span",
982         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
983         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
984         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
985         "a", "b", "i", "p", "q", "u",
986         0,
987 };
988
989 int
990 isudiff(void)
991 {
992         char *p;
993
994         p = (char*)buf;
995         if((p = strstr(p, "diff")) != nil)
996         if((p = strchr(p, '\n')) != nil)
997         if(strncmp(++p, "--- ", 4) == 0)
998         if((p = strchr(p, '\n')) != nil)
999         if(strncmp(++p, "+++ ", 4) == 0)
1000         if((p = strchr(p, '\n')) != nil)
1001         if(strncmp(++p, "@@ ", 3) == 0){
1002                 print("%s\n", mime ? "text/plain" : "unified diff output");
1003                 return 1;
1004         }
1005         return 0;
1006 }
1007
1008 int
1009 ishtml(void)
1010 {
1011         int i, n, count;
1012         uchar *p;
1013
1014         count = 0;
1015         p = buf;
1016         for(;;) {
1017                 while(p < buf+nbuf && *p != '<')
1018                         p++;
1019                 p++;
1020                 if (p >= buf+nbuf)
1021                         break;
1022                 if(*p == '/')
1023                         p++;
1024                 if(p >= buf+nbuf)
1025                         break;
1026                 for(i = 0; html_string[i]; i++){
1027                         n = strlen(html_string[i]);
1028                         if(p + n > buf+nbuf)
1029                                 continue;
1030                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1031                                 p += n;
1032                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1033                                         if(++count > 2) {
1034                                                 print("%s\n", mime ? "text/html" : "HTML file");
1035                                                 return 1;
1036                                         }
1037                                 }
1038                                 break;
1039                         }
1040                 }
1041         }
1042         return 0;
1043 }
1044
1045 char*   rfc822_string[] =
1046 {
1047         "from:",
1048         "date:",
1049         "to:",
1050         "subject:",
1051         "received:",
1052         "reply to:",
1053         "sender:",
1054         0,
1055 };
1056
1057 int
1058 isrfc822(void)
1059 {
1060
1061         char *p, *q, *r;
1062         int i, count;
1063
1064         count = 0;
1065         p = (char*)buf;
1066         for(;;) {
1067                 q = strchr(p, '\n');
1068                 if(q == nil)
1069                         break;
1070                 *q = 0;
1071                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1072                         count++;
1073                         *q = '\n';
1074                         p = q+1;
1075                         continue;
1076                 }
1077                 *q = '\n';
1078                 if(*p != '\t' && *p != ' '){
1079                         r = strchr(p, ':');
1080                         if(r == 0 || r > q)
1081                                 break;
1082                         for(i = 0; rfc822_string[i]; i++) {
1083                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1084                                         count++;
1085                                         break;
1086                                 }
1087                         }
1088                 }
1089                 p = q+1;
1090         }
1091         if(count >= 3){
1092                 print("%s\n", mime ? "message/rfc822" : "email file");
1093                 return 1;
1094         }
1095         return 0;
1096 }
1097
1098 int
1099 ismbox(void)
1100 {
1101         char *p, *q;
1102
1103         p = (char*)buf;
1104         q = strchr(p, '\n');
1105         if(q == nil)
1106                 return 0;
1107         *q = 0;
1108         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1109                 print("%s\n", mime ? PLAIN : "mail box");
1110                 return 1;
1111         }
1112         *q = '\n';
1113         return 0;
1114 }
1115
1116 int
1117 iscint(void)
1118 {
1119         int type;
1120         char *name;
1121         Biobuf b;
1122
1123         if(Binit(&b, fd, OREAD) == Beof)
1124                 return 0;
1125         seek(fd, 0, 0);
1126         type = objtype(&b, &name);
1127         if(type < 0)
1128                 return 0;
1129         if(mime)
1130                 print("%s\n", OCTET);
1131         else
1132                 print("%s intermediate\n", name);
1133         return 1;
1134 }
1135
1136 int
1137 isc(void)
1138 {
1139         int n;
1140
1141         n = wfreq[I1];
1142         /*
1143          * includes
1144          */
1145         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1146                 goto yes;
1147         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1148                 goto yes;
1149         /*
1150          * declarations
1151          */
1152         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1153                 goto yes;
1154         /*
1155          * assignments
1156          */
1157         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1158                 goto yes;
1159         return 0;
1160
1161 yes:
1162         if(mime){
1163                 print("%s\n", PLAIN);
1164                 return 1;
1165         }
1166         if(wfreq[Alword] > 0)
1167                 print("alef program\n");
1168         else
1169                 print("c program\n");
1170         return 1;
1171 }
1172
1173 int
1174 islimbo(void)
1175 {
1176         /*
1177          * includes
1178          */
1179         if(wfreq[Lword] < 4)
1180                 return 0;
1181         print("%s\n", mime ? PLAIN : "limbo program");
1182         return 1;
1183 }
1184
1185 int
1186 isas(void)
1187 {
1188         /*
1189          * includes
1190          */
1191         if(wfreq[Aword] < 2)
1192                 return 0;
1193         print("%s\n", mime ? PLAIN : "as program");
1194         return 1;
1195 }
1196
1197 int
1198 istga(void)
1199 {
1200         uchar *p;
1201
1202         p = buf;
1203         if(nbuf < 18)
1204                 return 0;
1205         if((p[12] | p[13]<<8) == 0)     /* width */
1206                 return 0;
1207         if((p[14] | p[15]<<8) == 0)     /* height */
1208                 return 0;
1209         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1210                 return 0;
1211         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1212                 return 0;
1213         if(p[1] == 0){  /* non color-mapped */
1214                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1215                         return 0;
1216                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1217                         return 0;
1218         } else
1219         if(p[1] == 1){  /* color-mapped */
1220                 if((p[2]&3) != 1 || p[7] == 0)  
1221                         return 0;
1222                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1223                         return 0;
1224         } else
1225                 return 0;
1226         print("%s\n", mime ? "image/tga" : "targa image");
1227         return 1;
1228 }
1229
1230 int
1231 ismp3(void)
1232 {
1233         uchar *p, *e;
1234
1235         p = buf;
1236         e = p + nbuf-1;
1237         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1238                 if((p[1] & 0xFE) == 0xFA){
1239                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1240                         return 1;
1241                 }
1242                 p++;
1243         }
1244         return 0;
1245 }
1246
1247 int
1248 ismp4(void)
1249 {
1250         if(nbuf <= 12)
1251                 return 0;
1252         if(memcmp(&buf[4], "ftyp", 4) != 0)
1253                 return 0;
1254         if(memcmp(&buf[8], "isom", 4) == 0){
1255                 print("%s\n", mime ? "video/mp4" : "mp4 video");
1256                 return 1;
1257         }
1258         if(memcmp(&buf[8], "M4A ", 4) == 0){
1259                 print("%s\n", mime ? "audio/m4a" : "m4a audio");
1260                 return 1;
1261         }
1262         return 0;
1263 }
1264
1265 /*
1266  * low entropy means encrypted
1267  */
1268 int
1269 ismung(void)
1270 {
1271         int i, bucket[8];
1272         float cs;
1273
1274         if(nbuf < 64)
1275                 return 0;
1276         memset(bucket, 0, sizeof(bucket));
1277         for(i=nbuf-64; i<nbuf; i++)
1278                 bucket[(buf[i]>>5)&07] += 1;
1279
1280         cs = 0.;
1281         for(i=0; i<8; i++)
1282                 cs += (bucket[i]-8)*(bucket[i]-8);
1283         cs /= 8.;
1284         if(cs <= 24.322) {
1285                 if(buf[0]==0x1f && buf[1]==0x9d)
1286                         print("%s\n", mime ? "application/x-compress" : "compressed");
1287                 else
1288                 if(buf[0]==0x1f && buf[1]==0x8b)
1289                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1290                 else
1291                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1292                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1293                 else
1294                 if(buf[0]==0x78 && buf[1]==0x9c)
1295                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1296                 else
1297                         print("%s\n", mime ? OCTET : "encrypted");
1298                 return 1;
1299         }
1300         return 0;
1301 }
1302
1303 /*
1304  * english by punctuation and frequencies
1305  */
1306 int
1307 isenglish(void)
1308 {
1309         int vow, comm, rare, badpun, punct;
1310         char *p;
1311
1312         if(guess != Fascii && guess != Feascii)
1313                 return 0;
1314         badpun = 0;
1315         punct = 0;
1316         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1317                 switch(*p) {
1318                 case '.':
1319                 case ',':
1320                 case ')':
1321                 case '%':
1322                 case ';':
1323                 case ':':
1324                 case '?':
1325                         punct++;
1326                         if(p[1] != ' ' && p[1] != '\n')
1327                                 badpun++;
1328                 }
1329         if(badpun*5 > punct)
1330                 return 0;
1331         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1332                 return 0;
1333         if(2*cfreq[';'] > cfreq['e'])
1334                 return 0;
1335
1336         vow = 0;
1337         for(p="AEIOU"; *p; p++) {
1338                 vow += cfreq[*p];
1339                 vow += cfreq[tolower(*p)];
1340         }
1341         comm = 0;
1342         for(p="ETAION"; *p; p++) {
1343                 comm += cfreq[*p];
1344                 comm += cfreq[tolower(*p)];
1345         }
1346         rare = 0;
1347         for(p="VJKQXZ"; *p; p++) {
1348                 rare += cfreq[*p];
1349                 rare += cfreq[tolower(*p)];
1350         }
1351         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1352                 print("%s\n", mime ? PLAIN : "English text");
1353                 return 1;
1354         }
1355         return 0;
1356 }
1357
1358 /*
1359  * pick up a number with
1360  * syntax _*[0-9]+_
1361  */
1362 #define P9BITLEN        12
1363 int
1364 p9bitnum(char *s, int *v)
1365 {
1366         char *es;
1367
1368         if(s[P9BITLEN-1] != ' ')
1369                 return -1;
1370         s[P9BITLEN-1] = '\0';
1371         *v = strtol(s, &es, 10);
1372         s[P9BITLEN-1] = ' ';
1373         if(es != &s[P9BITLEN-1])
1374                 return -1;
1375         return 0;
1376 }
1377
1378 int
1379 depthof(char *s, int *newp)
1380 {
1381         char *es;
1382         int d;
1383
1384         *newp = 0;
1385         es = s+12;
1386         while(s<es && *s==' ')
1387                 s++;
1388         if(s == es)
1389                 return -1;
1390         if('0'<=*s && *s<='9')
1391                 return 1<<strtol(s, nil, 0);
1392
1393         *newp = 1;
1394         d = 0;
1395         while(s<es && *s!=' '){
1396                 if(strchr("rgbkamx", *s) == nil)
1397                         return -1;
1398                 s++;
1399                 if('0'<=*s && *s<='9')
1400                         d += strtoul(s, &s, 10);
1401                 else
1402                         return -1;
1403         }
1404
1405         if(d % 8 == 0 || 8 % d == 0)
1406                 return d;
1407         else
1408                 return -1;
1409 }
1410
1411 int
1412 isp9bit(void)
1413 {
1414         int dep, lox, loy, hix, hiy, px, new, cmpr;
1415         long len;
1416         char *newlabel;
1417         uchar *cp;
1418
1419         cp = buf;
1420         cmpr = 0;
1421         if(memcmp(cp, "compressed\n", 11) == 0) {
1422                 cmpr = 1;
1423                 cp = buf + 11;
1424         }
1425
1426         if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1427                 return 0;
1428         newlabel = new ? "" : "old ";
1429         if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1430                 return 0;
1431         if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1432                 return 0;
1433         if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1434                 return 0;
1435         if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1436                 return 0;
1437
1438         hix -= lox;
1439         hiy -= loy;
1440         if(hix <= 0 || hiy <= 0)
1441                 return 0;
1442
1443         if(dep < 8){
1444                 px = 8/dep;             /* pixels per byte */
1445                 /* set l to number of bytes of data per scan line */
1446                 len = (hix+px-1)/px;
1447         }else
1448                 len = hix*dep/8;
1449         len *= hiy;                     /* col length */
1450         len += 5 * P9BITLEN;            /* size of initial ascii */
1451
1452         /*
1453          * for compressed images, don't look any further. otherwise:
1454          * for image file, length is non-zero and must match calculation above.
1455          * for /dev/window and /dev/screen the length is always zero.
1456          * for subfont, the subfont header should follow immediately.
1457          */
1458         if (cmpr) {
1459                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1460                         newlabel, dep, hix, hiy);
1461                 return 1;
1462         }
1463         /*
1464          * mbuf->length == 0 probably indicates reading a pipe.
1465          * Ghostscript sometimes produces a little extra on the end.
1466          */
1467         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1468             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1469                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1470                         newlabel, dep, hix, hiy);
1471                 return 1;
1472         }
1473         if (p9subfont(buf+len)) {
1474                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1475                         newlabel, dep, hix, hiy);
1476                 return 1;
1477         }
1478         return 0;
1479 }
1480
1481 int
1482 p9subfont(uchar *p)
1483 {
1484         int n, h, a;
1485
1486         /* if image too big, assume it's a subfont */
1487         if (p+3*P9BITLEN > buf+sizeof(buf))
1488                 return 1;
1489
1490         if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0)    /* char count */
1491                 return 0;
1492         if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0)    /* height */
1493                 return 0;
1494         if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0)    /* ascent */
1495                 return 0;
1496         if(n > 0 && h > 0 && a >= 0)
1497                 return 1;
1498         return 0;
1499 }
1500
1501 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1502
1503 int
1504 isp9font(void)
1505 {
1506         uchar *cp, *p;
1507         int i, n;
1508         char pathname[1024];
1509
1510         cp = buf;
1511         if (!getfontnum(cp, &cp))       /* height */
1512                 return 0;
1513         if (!getfontnum(cp, &cp))       /* ascent */
1514                 return 0;
1515         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1516                 if (!getfontnum(cp, &cp))       /* min */
1517                         break;
1518                 if (!getfontnum(cp, &cp))       /* max */
1519                         return 0;
1520                 getfontnum(cp, &cp);    /* optional offset */
1521                 while (WHITESPACE(*cp))
1522                         cp++;
1523                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1524                                 ;
1525                         /* construct a path name, if needed */
1526                 n = 0;
1527                 if (*p != '/' && slash) {
1528                         n = slash-fname+1;
1529                         if (n < sizeof(pathname))
1530                                 memcpy(pathname, fname, n);
1531                         else n = 0;
1532                 }
1533                 if (n+cp-p+4 < sizeof(pathname)) {
1534                         memcpy(pathname+n, p, cp-p);
1535                         n += cp-p;
1536                         pathname[n] = 0;
1537                         if (access(pathname, AEXIST) < 0) {
1538                                 strcpy(pathname+n, ".0");
1539                                 if (access(pathname, AEXIST) < 0)
1540                                         return 0;
1541                         }
1542                 }
1543         }
1544         if (i) {
1545                 print("%s\n", mime ? PLAIN : "font file");
1546                 return 1;
1547         }
1548         return 0;
1549 }
1550
1551 int
1552 getfontnum(uchar *cp, uchar **rp)
1553 {
1554         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1555                 cp++;
1556         if (*cp < '0' || *cp > '9')
1557                 return 0;
1558         strtoul((char *)cp, (char **)rp, 0);
1559         if (!WHITESPACE(**rp)) {
1560                 *rp = cp;
1561                 return 0;
1562         }
1563         return 1;
1564 }
1565
1566 int
1567 isrtf(void)
1568 {
1569         if(strstr((char *)buf, "\\rtf1")){
1570                 print(mime ? "application/rtf\n" : "rich text format\n");
1571                 return 1;
1572         }
1573         return 0;
1574 }
1575
1576 int
1577 ismsdos(void)
1578 {
1579         if (buf[0] == 0x4d && buf[1] == 0x5a){
1580                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1581                 return 1;
1582         }
1583         return 0;
1584 }
1585
1586 int
1587 isicocur(void)
1588 {
1589         if(buf[0] || buf[1] || buf[3] || buf[9])
1590                 return 0;
1591         if(buf[4] == 0x00 && buf[5] == 0x00)
1592                 return 0;
1593         switch(buf[2]){
1594         case 1:
1595                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1596                 return 1;
1597         case 2:
1598                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1599                 return 1;
1600         }
1601         return 0;
1602 }
1603
1604 int
1605 iself(void)
1606 {
1607         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1608         [1]     "WE32100",
1609         [2]     "SPARC",
1610         [3]     "i386",
1611         [4]     "M68000",
1612         [5]     "M88000",
1613         [6]     "i486",
1614         [7]     "i860",
1615         [8]     "R3000",
1616         [9]     "S370",
1617         [10]    "R4000",
1618         [15]    "HP-PA",
1619         [18]    "sparc v8+",
1620         [19]    "i960",
1621         [20]    "PPC-32",
1622         [21]    "PPC-64",
1623         [40]    "ARM",
1624         [41]    "Alpha",
1625         [43]    "sparc v9",
1626         [50]    "IA-64",
1627         [62]    "AMD64",
1628         [75]    "VAX",
1629         };
1630         static char *type[] = {
1631         [1]     "relocatable object",
1632         [2]     "executable",
1633         [3]     "shared library",
1634         [4]     "core dump",
1635         };
1636
1637         if (memcmp(buf, "\x7fELF", 4) == 0){
1638                 if (!mime){
1639                         int isdifend = 0;
1640                         int n = (buf[19] << 8) | buf[18];
1641                         char *p = "unknown";
1642                         char *t = "unknown";
1643
1644                         if (n > 0 && n < nelem(cpu) && cpu[n])
1645                                 p = cpu[n];
1646                         else {
1647                                 /* try the other byte order */
1648                                 isdifend = 1;
1649                                 n = (buf[18] << 8) | buf[19];
1650                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1651                                         p = cpu[n];
1652                         }
1653                         if(isdifend)
1654                                 n = (buf[16]<< 8) | buf[17];
1655                         else
1656                                 n = (buf[17]<< 8) | buf[16];
1657
1658                         if(n>0 && n < nelem(type) && type[n])
1659                                 t = type[n];
1660                         print("%s ELF %s\n", p, t);
1661                 }
1662                 else
1663                         print("application/x-elf-executable\n");
1664                 return 1;
1665         }
1666
1667         return 0;
1668 }
1669
1670 int
1671 isface(void)
1672 {
1673         int i, j, ldepth, l;
1674         char *p;
1675
1676         ldepth = -1;
1677         for(j = 0; j < 3; j++){
1678                 for(p = (char*)buf, i=0; i<3; i++){
1679                         if(p[0] != '0' || p[1] != 'x')
1680                                 return 0;
1681                         if(buf[2+8] == ',')
1682                                 l = 2;
1683                         else if(buf[2+4] == ',')
1684                                 l = 1;
1685                         else
1686                                 return 0;
1687                         if(ldepth == -1)
1688                                 ldepth = l;
1689                         if(l != ldepth)
1690                                 return 0;
1691                         strtoul(p, &p, 16);
1692                         if(*p++ != ',')
1693                                 return 0;
1694                         while(*p == ' ' || *p == '\t')
1695                                 p++;
1696                 }
1697                 if (*p++ != '\n')
1698                         return 0;
1699         }
1700
1701         if(mime)
1702                 print("application/x-face\n");
1703         else
1704                 print("face image depth %d\n", ldepth);
1705         return 1;
1706 }