]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
cwfs: fix listen filedescriptor leaks
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismung(void);
156 int     isp9bit(void);
157 int     isp9font(void);
158 int     isrtf(void);
159 int     ismsdos(void);
160 int     isicocur(void);
161 int     iself(void);
162 int     istring(void);
163 int     isoffstr(void);
164 int     iff(void);
165 int     long0(void);
166 int     longoff(void);
167 int     istar(void);
168 int     isface(void);
169 int     isexec(void);
170 int     isudiff(void);
171 int     p9bitnum(char*, int*);
172 int     p9subfont(uchar*);
173 void    print_utf(void);
174 void    type(char*, int);
175 int     utf_count(void);
176 void    wordfreq(void);
177
178 int     (*call[])(void) =
179 {
180         long0,          /* recognizable by first 4 bytes */
181         istring,        /* recognizable by first string */
182         iself,          /* ELF (foreign) executable */
183         isexec,         /* native executables */
184         iff,            /* interchange file format (strings) */
185         longoff,        /* recognizable by 4 bytes at some offset */
186         isoffstr,       /* recognizable by string at some offset */
187         isudiff,        /* unified diff output */
188         isrfc822,       /* email file */
189         ismbox,         /* mail box */
190         istar,          /* recognizable by tar checksum */
191         iscint,         /* compiler/assembler intermediate */
192         ishtml,         /* html keywords */
193         islimbo,        /* limbo source */
194         isc,            /* c & alef compiler key words */
195         isas,           /* assembler key words */
196         isp9font,       /* plan 9 font */
197         isp9bit,        /* plan 9 image (as from /dev/window) */
198         isrtf,          /* rich text format */
199         ismsdos,        /* msdos exe (virus file attachement) */
200         isicocur,               /* windows icon or cursor file */
201         isface,         /* ascii face file */
202         istga,
203         ismp3,
204
205         /* last resorts */
206         ismung,         /* entropy compressed/encrypted */
207         isenglish,      /* char frequency English */
208         0
209 };
210
211 int mime;
212
213 char OCTET[] =  "application/octet-stream";
214 char PLAIN[] =  "text/plain";
215
216 void
217 main(int argc, char *argv[])
218 {
219         int i, j, maxlen;
220         char *cp;
221         Rune r;
222
223         ARGBEGIN{
224         case 'm':
225                 mime = 1;
226                 break;
227         default:
228                 fprint(2, "usage: file [-m] [file...]\n");
229                 exits("usage");
230         }ARGEND;
231
232         maxlen = 0;
233         if(mime == 0 || argc > 1){
234                 for(i = 0; i < argc; i++) {
235                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
236                                         ;
237                         if(j > maxlen)
238                                 maxlen = j;
239                 }
240         }
241         if (argc <= 0) {
242                 if(!mime)
243                         print ("stdin: ");
244                 filetype(0);
245         }
246         else {
247                 for(i = 0; i < argc; i++)
248                         type(argv[i], maxlen);
249         }
250         exits(0);
251 }
252
253 void
254 type(char *file, int nlen)
255 {
256         Rune r;
257         int i;
258         char *p;
259
260         if(nlen > 0){
261                 slash = 0;
262                 for (i = 0, p = file; *p; i++) {
263                         if (*p == '/')                  /* find rightmost slash */
264                                 slash = p;
265                         p += chartorune(&r, p);         /* count runes */
266                 }
267                 print("%s:%*s",file, nlen-i+1, "");
268         }
269         fname = file;
270         if ((fd = open(file, OREAD)) < 0) {
271                 fprint(2, "cannot open: %r\n");
272                 return;
273         }
274         filetype(fd);
275         close(fd);
276 }
277
278 void
279 utfconv(void)
280 {
281         Rune r;
282         uchar *rb;
283         char *p, *e;
284         int i;
285
286         if(nbuf < 4)
287                 return;
288
289         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
290                 if(!mime)
291                         print("utf-32be ");
292                 return;
293         } else
294         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
295                 if(!mime)
296                         print("utf-32le ");
297                 return;
298         } else
299         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
300                 memmove(buf, buf+3, nbuf-3);
301                 nbuf -= 3;
302                 return;
303         } else
304         if(memcmp(buf, "\xFE\xFF", 2) == 0){
305                 if(!mime)
306                         print("utf-16be ");
307
308                 nbuf -= 2;
309                 rb = malloc(nbuf+1);
310                 memmove(rb, buf+2, nbuf);
311                 p = (char*)buf;
312                 e = p+sizeof(buf)-UTFmax-1;
313                 for(i=0; i<nbuf && p < e; i+=2){
314                         r = rb[i+1] | rb[i]<<8;
315                         p += runetochar(p, &r);
316                 }
317                 *p = 0;
318                 free(rb);
319                 nbuf = p - (char*)buf;
320         } else
321         if(memcmp(buf, "\xFF\xFE", 2) == 0){
322                 if(!mime)
323                         print("utf-16le ");
324
325                 nbuf -= 2;
326                 rb = malloc(nbuf+1);
327                 memmove(rb, buf+2, nbuf);
328                 p = (char*)buf;
329                 e = p+sizeof(buf)-UTFmax-1;
330                 for(i=0; i<nbuf && p < e; i+=2){
331                         r = rb[i] | rb[i+1]<<8;
332                         p += runetochar(p, &r);
333                 }
334                 *p = 0;
335                 free(rb);
336                 nbuf = p - (char*)buf;
337         }
338 }
339
340 void
341 filetype(int fd)
342 {
343         Rune r;
344         int i, f, n;
345         char *p, *eob;
346
347         free(mbuf);
348         mbuf = dirfstat(fd);
349         if(mbuf == nil){
350                 fprint(2, "cannot stat: %r\n");
351                 return;
352         }
353         if(mbuf->mode & DMDIR) {
354                 print("%s\n", mime ? OCTET : "directory");
355                 return;
356         }
357         if(mbuf->type != 'M' && mbuf->type != '|') {
358                 if(mime)
359                         print("%s\n", OCTET);
360                 else
361                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
362                 return;
363         }
364         /* may be reading a pipe on standard input */
365         nbuf = readn(fd, buf, sizeof(buf)-1);
366         if(nbuf < 0) {
367                 fprint(2, "cannot read: %r\n");
368                 return;
369         }
370         if(nbuf == 0) {
371                 print("%s\n", mime ? PLAIN : "empty file");
372                 return;
373         }
374         buf[nbuf] = 0;
375
376         utfconv();
377
378         /*
379          * build histogram table
380          */
381         memset(cfreq, 0, sizeof(cfreq));
382         for (i = 0; language[i].name; i++)
383                 language[i].count = 0;
384         eob = (char *)buf+nbuf;
385         for(n = 0, p = (char *)buf; p < eob; n++) {
386                 if (!fullrune(p, eob-p) && eob-p < UTFmax)
387                         break;
388                 p += chartorune(&r, p);
389                 if (r == 0)
390                         f = Cnull;
391                 else if (r <= 0x7f) {
392                         if (!isprint(r) && !isspace(r))
393                                 f = Ceascii;    /* ASCII control char */
394                         else f = r;
395                 } else if (r == 0x80) {
396                         bump_utf_count(r);
397                         f = Cutf;
398                 } else if (r < 0xA0)
399                         f = Cbinary;    /* Invalid Runes */
400                 else if (r <= 0xff)
401                         f = Clatin;     /* Latin 1 */
402                 else {
403                         bump_utf_count(r);
404                         f = Cutf;               /* UTF extension */
405                 }
406                 cfreq[f]++;                     /* ASCII chars peg directly */
407         }
408         /*
409          * gross classify
410          */
411         if (cfreq[Cbinary])
412                 guess = Fbinary;
413         else if (cfreq[Cutf])
414                 guess = Futf;
415         else if (cfreq[Clatin])
416                 guess = Flatin;
417         else if (cfreq[Ceascii])
418                 guess = Feascii;
419         else if (cfreq[Cnull])
420                 guess = Fbinary;
421         else
422                 guess = Fascii;
423         /*
424          * lookup dictionary words
425          */
426         memset(wfreq, 0, sizeof(wfreq));
427         if(guess == Fascii || guess == Flatin || guess == Futf)
428                 wordfreq();
429         /*
430          * call individual classify routines
431          */
432         for(i=0; call[i]; i++)
433                 if((*call[i])())
434                         return;
435
436         /*
437          * if all else fails,
438          * print out gross classification
439          */
440         if (nbuf < 100 && !mime)
441                 print(mime ? PLAIN : "short ");
442         if (guess == Fascii)
443                 print("%s\n", mime ? PLAIN : "Ascii");
444         else if (guess == Feascii)
445                 print("%s\n", mime ? PLAIN : "extended ascii");
446         else if (guess == Flatin)
447                 print("%s\n", mime ? PLAIN : "latin ascii");
448         else if (guess == Futf && utf_count() < 4)
449                 print_utf();
450         else print("%s\n", mime ? OCTET : "binary");
451 }
452
453 void
454 bump_utf_count(Rune r)
455 {
456         int low, high, mid;
457
458         high = sizeof(language)/sizeof(language[0])-1;
459         for (low = 0; low < high;) {
460                 mid = (low+high)/2;
461                 if (r >= language[mid].low) {
462                         if (r <= language[mid].high) {
463                                 language[mid].count++;
464                                 break;
465                         } else low = mid+1;
466                 } else high = mid;
467         }
468 }
469
470 int
471 utf_count(void)
472 {
473         int i, count;
474
475         count = 0;
476         for (i = 0; language[i].name; i++)
477                 if (language[i].count > 0)
478                         switch (language[i].mode) {
479                         case Normal:
480                         case First:
481                                 count++;
482                                 break;
483                         default:
484                                 break;
485                         }
486         return count;
487 }
488
489 int
490 chkascii(void)
491 {
492         int i;
493
494         for (i = 'a'; i < 'z'; i++)
495                 if (cfreq[i])
496                         return 1;
497         for (i = 'A'; i < 'Z'; i++)
498                 if (cfreq[i])
499                         return 1;
500         return 0;
501 }
502
503 int
504 find_first(char *name)
505 {
506         int i;
507
508         for (i = 0; language[i].name != 0; i++)
509                 if (language[i].mode == First
510                         && strcmp(language[i].name, name) == 0)
511                         return i;
512         return -1;
513 }
514
515 void
516 print_utf(void)
517 {
518         int i, printed, j;
519
520         if(mime){
521                 print("%s\n", PLAIN);
522                 return;
523         }
524         if (chkascii()) {
525                 printed = 1;
526                 print("Ascii");
527         } else
528                 printed = 0;
529         for (i = 0; language[i].name; i++)
530                 if (language[i].count) {
531                         switch(language[i].mode) {
532                         case Multi:
533                                 j = find_first(language[i].name);
534                                 if (j < 0)
535                                         break;
536                                 if (language[j].count > 0)
537                                         break;
538                                 /* Fall through */
539                         case Normal:
540                         case First:
541                                 if (printed)
542                                         print(" & ");
543                                 else printed = 1;
544                                 print("%s", language[i].name);
545                                 break;
546                         case Shared:
547                         default:
548                                 break;
549                         }
550                 }
551         if(!printed)
552                 print("UTF");
553         print(" text\n");
554 }
555
556 void
557 wordfreq(void)
558 {
559         int low, high, mid, r;
560         uchar *p, *p2, c;
561
562         p = buf;
563         for(;;) {
564                 while (p < buf+nbuf && !isalpha(*p))
565                         p++;
566                 if (p >= buf+nbuf)
567                         return;
568                 p2 = p;
569                 while(p < buf+nbuf && isalpha(*p))
570                         p++;
571                 c = *p;
572                 *p = 0;
573                 high = sizeof(dict)/sizeof(dict[0]);
574                 for(low = 0;low < high;) {
575                         mid = (low+high)/2;
576                         r = strcmp(dict[mid].word, (char*)p2);
577                         if(r == 0) {
578                                 wfreq[dict[mid].class]++;
579                                 break;
580                         }
581                         if(r < 0)
582                                 low = mid+1;
583                         else
584                                 high = mid;
585                 }
586                 *p++ = c;
587         }
588 }
589
590 typedef struct Filemagic Filemagic;
591 struct Filemagic {
592         ulong x;
593         ulong mask;
594         char *desc;
595         char *mime;
596 };
597
598 /*
599  * integers in this table must be as seen on a little-endian machine
600  * when read from a file.
601  */
602 Filemagic long0tab[] = {
603         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
604         /* "pac1" */
605         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
606         /* "pXc2 */
607         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
608         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
609         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
610         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
611         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
612         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
613         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
614         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
615         0xf0ff,         0xf6ff,         "aac audio",    "audio/mpeg",
616         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
617         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
618         /* 0xfeedfacf */
619         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
620         /* 0xcefaedfe */
621         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
622         /* 0xcffaedfe */
623         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
624         /* 0xcafebabe */
625         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
626         /*
627          * venti & fossil magic numbers are stored big-endian on disk,
628          * thus the numbers appear reversed in this table.
629          */
630         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
631         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
632         0x1a53454e,     0xFFFFFFFF,     "NES ROM", OCTET,
633         /* tcpdump pcap file */
634         0xa1b2c3d4,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
635         0xd4c3b2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
636         0xa1b23c4d,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
637         0x4d3cb2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
638 };
639
640 int
641 filemagic(Filemagic *tab, int ntab, ulong x)
642 {
643         int i;
644
645         for(i=0; i<ntab; i++)
646                 if((x&tab[i].mask) == tab[i].x){
647                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
648                         return 1;
649                 }
650         return 0;
651 }
652
653 int
654 long0(void)
655 {
656         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
657 }
658
659 typedef struct Fileoffmag Fileoffmag;
660 struct Fileoffmag {
661         ulong   off;
662         Filemagic;
663 };
664
665 /*
666  * integers in this table must be as seen on a little-endian machine
667  * when read from a file.
668  */
669 Fileoffmag longofftab[] = {
670         /*
671          * venti & fossil magic numbers are stored big-endian on disk,
672          * thus the numbers appear reversed in this table.
673          */
674         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
675         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
676         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
677         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
678         0x100,    0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
679         0x1fc,    0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
680 };
681
682 int
683 fileoffmagic(Fileoffmag *tab, int ntab)
684 {
685         int i;
686         ulong x;
687         Fileoffmag *tp;
688         uchar buf[sizeof(long)];
689
690         for(i=0; i<ntab; i++) {
691                 tp = tab + i;
692                 seek(fd, tp->off, 0);
693                 if (readn(fd, buf, sizeof buf) != sizeof buf)
694                         continue;
695                 x = LENDIAN(buf);
696                 if((x&tp->mask) == tp->x){
697                         print("%s\n", mime ? tp->mime : tp->desc);
698                         return 1;
699                 }
700         }
701         return 0;
702 }
703
704 int
705 longoff(void)
706 {
707         return fileoffmagic(longofftab, nelem(longofftab));
708 }
709
710 int
711 isexec(void)
712 {
713         Fhdr f;
714
715         seek(fd, 0, 0);         /* reposition to start of file */
716         if(crackhdr(fd, &f)) {
717                 print("%s\n", mime ? OCTET : f.name);
718                 return 1;
719         }
720         return 0;
721 }
722
723
724 /* from tar.c */
725 enum { NAMSIZ = 100, TBLOCK = 512 };
726
727 union   hblock
728 {
729         char    dummy[TBLOCK];
730         struct  header
731         {
732                 char    name[NAMSIZ];
733                 char    mode[8];
734                 char    uid[8];
735                 char    gid[8];
736                 char    size[12];
737                 char    mtime[12];
738                 char    chksum[8];
739                 char    linkflag;
740                 char    linkname[NAMSIZ];
741                 /* rest are defined by POSIX's ustar format; see p1003.2b */
742                 char    magic[6];       /* "ustar" */
743                 char    version[2];
744                 char    uname[32];
745                 char    gname[32];
746                 char    devmajor[8];
747                 char    devminor[8];
748                 char    prefix[155];  /* if non-null, path = prefix "/" name */
749         } dbuf;
750 };
751
752 int
753 checksum(union hblock *hp)
754 {
755         int i;
756         char *cp;
757         struct header *hdr = &hp->dbuf;
758
759         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
760                 *cp = ' ';
761         i = 0;
762         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
763                 i += *cp & 0xff;
764         return i;
765 }
766
767 int
768 istar(void)
769 {
770         int chksum;
771         char tblock[TBLOCK];
772         union hblock *hp = (union hblock *)tblock;
773         struct header *hdr = &hp->dbuf;
774
775         seek(fd, 0, 0);         /* reposition to start of file */
776         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
777                 return 0;
778         chksum = strtol(hdr->chksum, 0, 8);
779         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
780                 if (strcmp(hdr->magic, "ustar") == 0)
781                         print(mime? "application/x-ustar\n": "posix tar archive\n");
782                 else
783                         print(mime? "application/x-tar\n": "tar archive\n");
784                 return 1;
785         }
786         return 0;
787 }
788
789 /*
790  * initial words to classify file
791  */
792 struct  FILE_STRING
793 {
794         char    *key;
795         char    *filetype;
796         int     length;
797         char    *mime;
798 } file_string[] =
799 {
800         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
801         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
802         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
803         "!<arch>\n__.SYMDEF",   "archive random library",       16,     OCTET,
804         "!<arch>\n",            "archive",                      8,      OCTET,
805         "070707",               "cpio archive - ascii header",  6,      OCTET,
806         "#!/bin/rc",            "rc executable file",           9,      PLAIN,
807         "#!/bin/sh",            "sh executable file",           9,      PLAIN,
808         "%!",                   "postscript",                   2,      "application/postscript",
809         "\004%!",               "postscript",                   3,      "application/postscript",
810         "x T post",             "troff output for post",        8,      "application/troff",
811         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
812         "x T utf",              "troff output for UTF",         7,      "application/troff",
813         "x T 202",              "troff output for 202",         7,      "application/troff",
814         "x T aps",              "troff output for aps",         7,      "application/troff",
815         "x T ",                 "troff output",                 4,      "application/troff",
816         "GIF",                  "GIF image",                    3,      "image/gif",
817         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
818         "%PDF",                 "PDF",                          4,      "application/pdf",
819         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
820         "<!doctype",            "HTML file",                    9,      "text/html",
821         "<!--",                 "HTML file",                    4,      "text/html",
822         "<html>",               "HTML file",                    6,      "text/html",
823         "<HTML>",               "HTML file",                    6,      "text/html",
824         "<?xml",                "HTML file",                    5,      "text/html",
825         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
826         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
827         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
828         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
829         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
830         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
831         "BM",                   "bmp",                          2,      "image/bmp", 
832         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
833         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
834         "\033E\033",            "HP PCL printer data",          3,      OCTET,
835         "\033&",                "HP PCL printer data",          2,      OCTET,
836         "\033%-12345X",         "HPJCL file",           9,      "application/hpjcl",
837         "\033Lua",              "Lua bytecode",         4,      OCTET,
838         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
839         "OggS",                 "ogg audio",            4,      "audio/ogg",
840         ".snd",                 "sun audio",            4,      "audio/basic",
841         "\211PNG",              "PNG image",            4,      "image/png",
842         "P1\n",                 "ppm",                  3,      "image/ppm",
843         "P2\n",                 "ppm",                  3,      "image/ppm",
844         "P3\n",                 "ppm",                  3,      "image/ppm",
845         "P4\n",                 "ppm",                  3,      "image/ppm",
846         "P5\n",                 "ppm",                  3,      "image/ppm",
847         "P6\n",                 "ppm",                  3,      "image/ppm",
848         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
849         ".HTML ",               "troff -ms input",      6,      "text/troff",
850         ".LP",                  "troff -ms input",      3,      "text/troff",
851         ".ND",                  "troff -ms input",      3,      "text/troff",
852         ".PP",                  "troff -ms input",      3,      "text/troff",
853         ".TL",                  "troff -ms input",      3,      "text/troff",
854         ".TR",                  "troff -ms input",      3,      "text/troff",
855         ".TH",                  "manual page",          3,      "text/troff",
856         ".\\\"",                "troff input",          3,      "text/troff",
857         ".de",                  "troff input",          3,      "text/troff",
858         ".if",                  "troff input",          3,      "text/troff",
859         ".nr",                  "troff input",          3,      "text/troff",
860         ".tr",                  "troff input",          3,      "text/troff",
861         "vac:",                 "venti score",          4,      PLAIN,
862         "-----BEGIN CERTIFICATE-----\n",
863                                 "pem certificate",      -1,     PLAIN,
864         "-----BEGIN TRUSTED CERTIFICATE-----\n",
865                                 "pem trusted certificate", -1,  PLAIN,
866         "-----BEGIN X509 CERTIFICATE-----\n",
867                                 "pem x.509 certificate", -1,    PLAIN,
868         "subject=/C=",          "pem certificate with header", -1, PLAIN,
869         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
870         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
871         "[playlist]",           "playlist",             10,     "application/x-scpls",
872         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
873         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
874         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
875         "AT&T",                 "DjVu document",        4,      "image/vnd.djvu",
876         "Extended module: ",    "XM audio",             17,     "audio/xm",
877         "MThd",                 "midi audio",           4,      "audio/midi",
878         "MUS\x1a",              "mus audio",            4,      "audio/mus",
879         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
880         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
881         "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
882         "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
883         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
884         "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
885         "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
886         "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
887         0,0,0,0
888 };
889
890 int
891 istring(void)
892 {
893         int i, l;
894         struct FILE_STRING *p;
895
896         for(p = file_string; p->key; p++) {
897                 l = p->length;
898                 if(l == -1)
899                         l = strlen(p->key);
900                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
901                         print("%s\n", mime ? p->mime : p->filetype);
902                         return 1;
903                 }
904         }
905         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
906                 for(i = 5; i < nbuf; i++)
907                         if(buf[i] == '\n')
908                                 break;
909                 if(mime)
910                         print("%s\n", OCTET);
911                 else
912                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
913                 return 1;
914         }
915         return 0;
916 }
917
918 struct offstr
919 {
920         ulong   off;
921         struct FILE_STRING;
922 } offstrs[] = {
923         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
924         32*4, "DICM",   "DICOM medical imaging data",   4,      "application/dicom",
925         0, 0, 0, 0, 0
926 };
927
928 int
929 isoffstr(void)
930 {
931         int n;
932         char buf[256];
933         struct offstr *p;
934
935         for(p = offstrs; p->key; p++) {
936                 seek(fd, p->off, 0);
937                 n = p->length;
938                 if (n > sizeof buf)
939                         n = sizeof buf;
940                 if (readn(fd, buf, n) != n)
941                         continue;
942                 if(memcmp(buf, p->key, n) == 0) {
943                         print("%s\n", mime ? p->mime : p->filetype);
944                         return 1;
945                 }
946         }
947         return 0;
948 }
949
950 int
951 iff(void)
952 {
953         if (strncmp((char*)buf, "FORM", 4) == 0 &&
954             strncmp((char*)buf+8, "AIFF", 4) == 0) {
955                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
956                 return 1;
957         }
958         if (strncmp((char*)buf, "RIFF", 4) == 0) {
959                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
960                         print("%s\n", mime? "audio/wave": "wave audio");
961                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
962                         print("%s\n", mime? "video/avi": "avi video");
963                 else
964                         print("%s\n", mime? OCTET : "riff file");
965                 return 1;
966         }
967         return 0;
968 }
969
970 char*   html_string[] = {
971         "blockquote",
972         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
973         "caption",
974         "button", "center", "iframe", "object", "option", "script",
975         "select", "strong",
976         "blink", "embed", "frame", "input", "label", "param", "small",
977         "style", "table", "tbody", "tfoot", "thead", "title",
978         "?xml", "body", "code", "font", "form", "head", "html",
979         "link", "menu", "meta", "span",
980         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
981         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
982         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
983         "a", "b", "i", "p", "q", "u",
984         0,
985 };
986
987 int
988 isudiff(void)
989 {
990         char *p;
991
992         p = (char*)buf;
993         if((p = strstr(p, "diff")) != nil)
994         if((p = strchr(p, '\n')) != nil)
995         if(strncmp(++p, "--- ", 4) == 0)
996         if((p = strchr(p, '\n')) != nil)
997         if(strncmp(++p, "+++ ", 4) == 0)
998         if((p = strchr(p, '\n')) != nil)
999         if(strncmp(++p, "@@ ", 3) == 0){
1000                 print("%s\n", mime ? "text/plain" : "unified diff output");
1001                 return 1;
1002         }
1003         return 0;
1004 }
1005
1006 int
1007 ishtml(void)
1008 {
1009         int i, n, count;
1010         uchar *p;
1011
1012         count = 0;
1013         p = buf;
1014         for(;;) {
1015                 while(p < buf+nbuf && *p != '<')
1016                         p++;
1017                 p++;
1018                 if (p >= buf+nbuf)
1019                         break;
1020                 if(*p == '/')
1021                         p++;
1022                 if(p >= buf+nbuf)
1023                         break;
1024                 for(i = 0; html_string[i]; i++){
1025                         n = strlen(html_string[i]);
1026                         if(p + n > buf+nbuf)
1027                                 continue;
1028                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1029                                 p += n;
1030                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1031                                         if(++count > 2) {
1032                                                 print("%s\n", mime ? "text/html" : "HTML file");
1033                                                 return 1;
1034                                         }
1035                                 }
1036                                 break;
1037                         }
1038                 }
1039         }
1040         return 0;
1041 }
1042
1043 char*   rfc822_string[] =
1044 {
1045         "from:",
1046         "date:",
1047         "to:",
1048         "subject:",
1049         "received:",
1050         "reply to:",
1051         "sender:",
1052         0,
1053 };
1054
1055 int
1056 isrfc822(void)
1057 {
1058
1059         char *p, *q, *r;
1060         int i, count;
1061
1062         count = 0;
1063         p = (char*)buf;
1064         for(;;) {
1065                 q = strchr(p, '\n');
1066                 if(q == nil)
1067                         break;
1068                 *q = 0;
1069                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1070                         count++;
1071                         *q = '\n';
1072                         p = q+1;
1073                         continue;
1074                 }
1075                 *q = '\n';
1076                 if(*p != '\t' && *p != ' '){
1077                         r = strchr(p, ':');
1078                         if(r == 0 || r > q)
1079                                 break;
1080                         for(i = 0; rfc822_string[i]; i++) {
1081                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1082                                         count++;
1083                                         break;
1084                                 }
1085                         }
1086                 }
1087                 p = q+1;
1088         }
1089         if(count >= 3){
1090                 print("%s\n", mime ? "message/rfc822" : "email file");
1091                 return 1;
1092         }
1093         return 0;
1094 }
1095
1096 int
1097 ismbox(void)
1098 {
1099         char *p, *q;
1100
1101         p = (char*)buf;
1102         q = strchr(p, '\n');
1103         if(q == nil)
1104                 return 0;
1105         *q = 0;
1106         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1107                 print("%s\n", mime ? PLAIN : "mail box");
1108                 return 1;
1109         }
1110         *q = '\n';
1111         return 0;
1112 }
1113
1114 int
1115 iscint(void)
1116 {
1117         int type;
1118         char *name;
1119         Biobuf b;
1120
1121         if(Binit(&b, fd, OREAD) == Beof)
1122                 return 0;
1123         seek(fd, 0, 0);
1124         type = objtype(&b, &name);
1125         if(type < 0)
1126                 return 0;
1127         if(mime)
1128                 print("%s\n", OCTET);
1129         else
1130                 print("%s intermediate\n", name);
1131         return 1;
1132 }
1133
1134 int
1135 isc(void)
1136 {
1137         int n;
1138
1139         n = wfreq[I1];
1140         /*
1141          * includes
1142          */
1143         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1144                 goto yes;
1145         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1146                 goto yes;
1147         /*
1148          * declarations
1149          */
1150         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1151                 goto yes;
1152         /*
1153          * assignments
1154          */
1155         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1156                 goto yes;
1157         return 0;
1158
1159 yes:
1160         if(mime){
1161                 print("%s\n", PLAIN);
1162                 return 1;
1163         }
1164         if(wfreq[Alword] > 0)
1165                 print("alef program\n");
1166         else
1167                 print("c program\n");
1168         return 1;
1169 }
1170
1171 int
1172 islimbo(void)
1173 {
1174         /*
1175          * includes
1176          */
1177         if(wfreq[Lword] < 4)
1178                 return 0;
1179         print("%s\n", mime ? PLAIN : "limbo program");
1180         return 1;
1181 }
1182
1183 int
1184 isas(void)
1185 {
1186         /*
1187          * includes
1188          */
1189         if(wfreq[Aword] < 2)
1190                 return 0;
1191         print("%s\n", mime ? PLAIN : "as program");
1192         return 1;
1193 }
1194
1195 int
1196 istga(void)
1197 {
1198         uchar *p;
1199
1200         p = buf;
1201         if(nbuf < 18)
1202                 return 0;
1203         if((p[12] | p[13]<<8) == 0)     /* width */
1204                 return 0;
1205         if((p[14] | p[15]<<8) == 0)     /* height */
1206                 return 0;
1207         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1208                 return 0;
1209         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1210                 return 0;
1211         if(p[1] == 0){  /* non color-mapped */
1212                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1213                         return 0;
1214                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1215                         return 0;
1216         } else
1217         if(p[1] == 1){  /* color-mapped */
1218                 if((p[2]&3) != 1 || p[7] == 0)  
1219                         return 0;
1220                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1221                         return 0;
1222         } else
1223                 return 0;
1224         print("%s\n", mime ? "image/tga" : "targa image");
1225         return 1;
1226 }
1227
1228 int
1229 ismp3(void)
1230 {
1231         uchar *p, *e;
1232
1233         p = buf;
1234         e = p + nbuf-1;
1235         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1236                 if((p[1] & 0xFE) == 0xFA){
1237                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1238                         return 1;
1239                 }
1240                 p++;
1241         }
1242         return 0;
1243 }
1244
1245 /*
1246  * low entropy means encrypted
1247  */
1248 int
1249 ismung(void)
1250 {
1251         int i, bucket[8];
1252         float cs;
1253
1254         if(nbuf < 64)
1255                 return 0;
1256         memset(bucket, 0, sizeof(bucket));
1257         for(i=nbuf-64; i<nbuf; i++)
1258                 bucket[(buf[i]>>5)&07] += 1;
1259
1260         cs = 0.;
1261         for(i=0; i<8; i++)
1262                 cs += (bucket[i]-8)*(bucket[i]-8);
1263         cs /= 8.;
1264         if(cs <= 24.322) {
1265                 if(buf[0]==0x1f && buf[1]==0x9d)
1266                         print("%s\n", mime ? "application/x-compress" : "compressed");
1267                 else
1268                 if(buf[0]==0x1f && buf[1]==0x8b)
1269                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1270                 else
1271                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1272                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1273                 else
1274                 if(buf[0]==0x78 && buf[1]==0x9c)
1275                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1276                 else
1277                         print("%s\n", mime ? OCTET : "encrypted");
1278                 return 1;
1279         }
1280         return 0;
1281 }
1282
1283 /*
1284  * english by punctuation and frequencies
1285  */
1286 int
1287 isenglish(void)
1288 {
1289         int vow, comm, rare, badpun, punct;
1290         char *p;
1291
1292         if(guess != Fascii && guess != Feascii)
1293                 return 0;
1294         badpun = 0;
1295         punct = 0;
1296         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1297                 switch(*p) {
1298                 case '.':
1299                 case ',':
1300                 case ')':
1301                 case '%':
1302                 case ';':
1303                 case ':':
1304                 case '?':
1305                         punct++;
1306                         if(p[1] != ' ' && p[1] != '\n')
1307                                 badpun++;
1308                 }
1309         if(badpun*5 > punct)
1310                 return 0;
1311         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1312                 return 0;
1313         if(2*cfreq[';'] > cfreq['e'])
1314                 return 0;
1315
1316         vow = 0;
1317         for(p="AEIOU"; *p; p++) {
1318                 vow += cfreq[*p];
1319                 vow += cfreq[tolower(*p)];
1320         }
1321         comm = 0;
1322         for(p="ETAION"; *p; p++) {
1323                 comm += cfreq[*p];
1324                 comm += cfreq[tolower(*p)];
1325         }
1326         rare = 0;
1327         for(p="VJKQXZ"; *p; p++) {
1328                 rare += cfreq[*p];
1329                 rare += cfreq[tolower(*p)];
1330         }
1331         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1332                 print("%s\n", mime ? PLAIN : "English text");
1333                 return 1;
1334         }
1335         return 0;
1336 }
1337
1338 /*
1339  * pick up a number with
1340  * syntax _*[0-9]+_
1341  */
1342 #define P9BITLEN        12
1343 int
1344 p9bitnum(char *s, int *v)
1345 {
1346         char *es;
1347
1348         if(s[P9BITLEN-1] != ' ')
1349                 return -1;
1350         s[P9BITLEN-1] = '\0';
1351         *v = strtol(s, &es, 10);
1352         s[P9BITLEN-1] = ' ';
1353         if(es != &s[P9BITLEN-1])
1354                 return -1;
1355         return 0;
1356 }
1357
1358 int
1359 depthof(char *s, int *newp)
1360 {
1361         char *es;
1362         int d;
1363
1364         *newp = 0;
1365         es = s+12;
1366         while(s<es && *s==' ')
1367                 s++;
1368         if(s == es)
1369                 return -1;
1370         if('0'<=*s && *s<='9')
1371                 return 1<<strtol(s, nil, 0);
1372
1373         *newp = 1;
1374         d = 0;
1375         while(s<es && *s!=' '){
1376                 if(strchr("rgbkamx", *s) == nil)
1377                         return -1;
1378                 s++;
1379                 if('0'<=*s && *s<='9')
1380                         d += strtoul(s, &s, 10);
1381                 else
1382                         return -1;
1383         }
1384
1385         if(d % 8 == 0 || 8 % d == 0)
1386                 return d;
1387         else
1388                 return -1;
1389 }
1390
1391 int
1392 isp9bit(void)
1393 {
1394         int dep, lox, loy, hix, hiy, px, new, cmpr;
1395         long len;
1396         char *newlabel;
1397         uchar *cp;
1398
1399         cp = buf;
1400         cmpr = 0;
1401         if(memcmp(cp, "compressed\n", 11) == 0) {
1402                 cmpr = 1;
1403                 cp = buf + 11;
1404         }
1405
1406         if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1407                 return 0;
1408         newlabel = new ? "" : "old ";
1409         if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1410                 return 0;
1411         if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1412                 return 0;
1413         if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1414                 return 0;
1415         if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1416                 return 0;
1417
1418         hix -= lox;
1419         hiy -= loy;
1420         if(hix <= 0 || hiy <= 0)
1421                 return 0;
1422
1423         if(dep < 8){
1424                 px = 8/dep;             /* pixels per byte */
1425                 /* set l to number of bytes of data per scan line */
1426                 len = (hix+px-1)/px;
1427         }else
1428                 len = hix*dep/8;
1429         len *= hiy;                     /* col length */
1430         len += 5 * P9BITLEN;            /* size of initial ascii */
1431
1432         /*
1433          * for compressed images, don't look any further. otherwise:
1434          * for image file, length is non-zero and must match calculation above.
1435          * for /dev/window and /dev/screen the length is always zero.
1436          * for subfont, the subfont header should follow immediately.
1437          */
1438         if (cmpr) {
1439                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1440                         newlabel, dep, hix, hiy);
1441                 return 1;
1442         }
1443         /*
1444          * mbuf->length == 0 probably indicates reading a pipe.
1445          * Ghostscript sometimes produces a little extra on the end.
1446          */
1447         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1448             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1449                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1450                         newlabel, dep, hix, hiy);
1451                 return 1;
1452         }
1453         if (p9subfont(buf+len)) {
1454                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1455                         newlabel, dep, hix, hiy);
1456                 return 1;
1457         }
1458         return 0;
1459 }
1460
1461 int
1462 p9subfont(uchar *p)
1463 {
1464         int n, h, a;
1465
1466         /* if image too big, assume it's a subfont */
1467         if (p+3*P9BITLEN > buf+sizeof(buf))
1468                 return 1;
1469
1470         if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0)    /* char count */
1471                 return 0;
1472         if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0)    /* height */
1473                 return 0;
1474         if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0)    /* ascent */
1475                 return 0;
1476         if(n > 0 && h > 0 && a >= 0)
1477                 return 1;
1478         return 0;
1479 }
1480
1481 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1482
1483 int
1484 isp9font(void)
1485 {
1486         uchar *cp, *p;
1487         int i, n;
1488         char pathname[1024];
1489
1490         cp = buf;
1491         if (!getfontnum(cp, &cp))       /* height */
1492                 return 0;
1493         if (!getfontnum(cp, &cp))       /* ascent */
1494                 return 0;
1495         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1496                 if (!getfontnum(cp, &cp))       /* min */
1497                         break;
1498                 if (!getfontnum(cp, &cp))       /* max */
1499                         return 0;
1500                 getfontnum(cp, &cp);    /* optional offset */
1501                 while (WHITESPACE(*cp))
1502                         cp++;
1503                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1504                                 ;
1505                         /* construct a path name, if needed */
1506                 n = 0;
1507                 if (*p != '/' && slash) {
1508                         n = slash-fname+1;
1509                         if (n < sizeof(pathname))
1510                                 memcpy(pathname, fname, n);
1511                         else n = 0;
1512                 }
1513                 if (n+cp-p+4 < sizeof(pathname)) {
1514                         memcpy(pathname+n, p, cp-p);
1515                         n += cp-p;
1516                         pathname[n] = 0;
1517                         if (access(pathname, AEXIST) < 0) {
1518                                 strcpy(pathname+n, ".0");
1519                                 if (access(pathname, AEXIST) < 0)
1520                                         return 0;
1521                         }
1522                 }
1523         }
1524         if (i) {
1525                 print("%s\n", mime ? PLAIN : "font file");
1526                 return 1;
1527         }
1528         return 0;
1529 }
1530
1531 int
1532 getfontnum(uchar *cp, uchar **rp)
1533 {
1534         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1535                 cp++;
1536         if (*cp < '0' || *cp > '9')
1537                 return 0;
1538         strtoul((char *)cp, (char **)rp, 0);
1539         if (!WHITESPACE(**rp)) {
1540                 *rp = cp;
1541                 return 0;
1542         }
1543         return 1;
1544 }
1545
1546 int
1547 isrtf(void)
1548 {
1549         if(strstr((char *)buf, "\\rtf1")){
1550                 print(mime ? "application/rtf\n" : "rich text format\n");
1551                 return 1;
1552         }
1553         return 0;
1554 }
1555
1556 int
1557 ismsdos(void)
1558 {
1559         if (buf[0] == 0x4d && buf[1] == 0x5a){
1560                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1561                 return 1;
1562         }
1563         return 0;
1564 }
1565
1566 int
1567 isicocur(void)
1568 {
1569         if(buf[0] || buf[1] || buf[3] || buf[9])
1570                 return 0;
1571         if(buf[4] == 0x00 && buf[5] == 0x00)
1572                 return 0;
1573         switch(buf[2]){
1574         case 1:
1575                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1576                 return 1;
1577         case 2:
1578                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1579                 return 1;
1580         }
1581         return 0;
1582 }
1583
1584 int
1585 iself(void)
1586 {
1587         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1588         [1]     "WE32100",
1589         [2]     "SPARC",
1590         [3]     "i386",
1591         [4]     "M68000",
1592         [5]     "M88000",
1593         [6]     "i486",
1594         [7]     "i860",
1595         [8]     "R3000",
1596         [9]     "S370",
1597         [10]    "R4000",
1598         [15]    "HP-PA",
1599         [18]    "sparc v8+",
1600         [19]    "i960",
1601         [20]    "PPC-32",
1602         [21]    "PPC-64",
1603         [40]    "ARM",
1604         [41]    "Alpha",
1605         [43]    "sparc v9",
1606         [50]    "IA-64",
1607         [62]    "AMD64",
1608         [75]    "VAX",
1609         };
1610         static char *type[] = {
1611         [1]     "relocatable object",
1612         [2]     "executable",
1613         [3]     "shared library",
1614         [4]     "core dump",
1615         };
1616
1617         if (memcmp(buf, "\x7fELF", 4) == 0){
1618                 if (!mime){
1619                         int isdifend = 0;
1620                         int n = (buf[19] << 8) | buf[18];
1621                         char *p = "unknown";
1622                         char *t = "unknown";
1623
1624                         if (n > 0 && n < nelem(cpu) && cpu[n])
1625                                 p = cpu[n];
1626                         else {
1627                                 /* try the other byte order */
1628                                 isdifend = 1;
1629                                 n = (buf[18] << 8) | buf[19];
1630                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1631                                         p = cpu[n];
1632                         }
1633                         if(isdifend)
1634                                 n = (buf[16]<< 8) | buf[17];
1635                         else
1636                                 n = (buf[17]<< 8) | buf[16];
1637
1638                         if(n>0 && n < nelem(type) && type[n])
1639                                 t = type[n];
1640                         print("%s ELF %s\n", p, t);
1641                 }
1642                 else
1643                         print("application/x-elf-executable\n");
1644                 return 1;
1645         }
1646
1647         return 0;
1648 }
1649
1650 int
1651 isface(void)
1652 {
1653         int i, j, ldepth, l;
1654         char *p;
1655
1656         ldepth = -1;
1657         for(j = 0; j < 3; j++){
1658                 for(p = (char*)buf, i=0; i<3; i++){
1659                         if(p[0] != '0' || p[1] != 'x')
1660                                 return 0;
1661                         if(buf[2+8] == ',')
1662                                 l = 2;
1663                         else if(buf[2+4] == ',')
1664                                 l = 1;
1665                         else
1666                                 return 0;
1667                         if(ldepth == -1)
1668                                 ldepth = l;
1669                         if(l != ldepth)
1670                                 return 0;
1671                         strtoul(p, &p, 16);
1672                         if(*p++ != ',')
1673                                 return 0;
1674                         while(*p == ' ' || *p == '\t')
1675                                 p++;
1676                 }
1677                 if (*p++ != '\n')
1678                         return 0;
1679         }
1680
1681         if(mime)
1682                 print("application/x-face\n");
1683         else
1684                 print("face image depth %d\n", ldepth);
1685         return 1;
1686 }