]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
dec241f96693f729e3e166ce512c996c56614e39
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     ismp3(void);
154 int     ismung(void);
155 int     isp9bit(void);
156 int     isp9font(void);
157 int     isrtf(void);
158 int     ismsdos(void);
159 int     isicocur(void);
160 int     iself(void);
161 int     istring(void);
162 int     isoffstr(void);
163 int     iff(void);
164 int     long0(void);
165 int     longoff(void);
166 int     istar(void);
167 int     isface(void);
168 int     isexec(void);
169 int     p9bitnum(uchar*);
170 int     p9subfont(uchar*);
171 void    print_utf(void);
172 void    type(char*, int);
173 int     utf_count(void);
174 void    wordfreq(void);
175
176 int     (*call[])(void) =
177 {
178         long0,          /* recognizable by first 4 bytes */
179         istring,        /* recognizable by first string */
180         iself,          /* ELF (foreign) executable */
181         isexec,         /* native executables */
182         iff,            /* interchange file format (strings) */
183         longoff,        /* recognizable by 4 bytes at some offset */
184         isoffstr,       /* recognizable by string at some offset */
185         isrfc822,       /* email file */
186         ismbox,         /* mail box */
187         istar,          /* recognizable by tar checksum */
188         ishtml,         /* html keywords */
189         iscint,         /* compiler/assembler intermediate */
190         islimbo,        /* limbo source */
191         isc,            /* c & alef compiler key words */
192         isas,           /* assembler key words */
193         isp9font,       /* plan 9 font */
194         isp9bit,        /* plan 9 image (as from /dev/window) */
195         isrtf,          /* rich text format */
196         ismsdos,        /* msdos exe (virus file attachement) */
197         isicocur,               /* windows icon or cursor file */
198         isface,         /* ascii face file */
199         ismp3,
200
201         /* last resorts */
202         ismung,         /* entropy compressed/encrypted */
203         isenglish,      /* char frequency English */
204         0
205 };
206
207 int mime;
208
209 char OCTET[] =  "application/octet-stream";
210 char PLAIN[] =  "text/plain";
211
212 void
213 main(int argc, char *argv[])
214 {
215         int i, j, maxlen;
216         char *cp;
217         Rune r;
218
219         ARGBEGIN{
220         case 'm':
221                 mime = 1;
222                 break;
223         default:
224                 fprint(2, "usage: file [-m] [file...]\n");
225                 exits("usage");
226         }ARGEND;
227
228         maxlen = 0;
229         if(mime == 0 || argc > 1){
230                 for(i = 0; i < argc; i++) {
231                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
232                                         ;
233                         if(j > maxlen)
234                                 maxlen = j;
235                 }
236         }
237         if (argc <= 0) {
238                 if(!mime)
239                         print ("stdin: ");
240                 filetype(0);
241         }
242         else {
243                 for(i = 0; i < argc; i++)
244                         type(argv[i], maxlen);
245         }
246         exits(0);
247 }
248
249 void
250 type(char *file, int nlen)
251 {
252         Rune r;
253         int i;
254         char *p;
255
256         if(nlen > 0){
257                 slash = 0;
258                 for (i = 0, p = file; *p; i++) {
259                         if (*p == '/')                  /* find rightmost slash */
260                                 slash = p;
261                         p += chartorune(&r, p);         /* count runes */
262                 }
263                 print("%s:%*s",file, nlen-i+1, "");
264         }
265         fname = file;
266         if ((fd = open(file, OREAD)) < 0) {
267                 print("cannot open: %r\n");
268                 return;
269         }
270         filetype(fd);
271         close(fd);
272 }
273
274 /*
275  * Unicode 4.0 4-byte runes.
276  */
277 typedef int Rune1;
278
279 enum {
280         UTFmax1 = 4,
281 };
282
283 int
284 fullrune1(char *p, int n)
285 {
286         int c;
287
288         if(n >= 1) {
289                 c = *(uchar*)p;
290                 if(c < 0x80)
291                         return 1;
292                 if(n >= 2 && c < 0xE0)
293                         return 1;
294                 if(n >= 3 && c < 0xF0)
295                         return 1;
296                 if(n >= 4)
297                         return 1;
298         }
299         return 0;
300 }
301
302 int
303 chartorune1(Rune1 *rune, char *str)
304 {
305         int c, c1, c2, c3, n;
306         Rune r;
307
308         c = *(uchar*)str;
309         if(c < 0xF0){
310                 r = 0;
311                 n = chartorune(&r, str);
312                 *rune = r;
313                 return n;
314         }
315         c &= ~0xF0;
316         c1 = *(uchar*)(str+1) & ~0x80;
317         c2 = *(uchar*)(str+2) & ~0x80;
318         c3 = *(uchar*)(str+3) & ~0x80;
319         n = (c<<18) | (c1<<12) | (c2<<6) | c3;
320         if(n < 0x10000 || n > 0x10FFFF){
321                 *rune = Runeerror;
322                 return 1;
323         }
324         *rune = n;
325         return 4;
326 }
327
328 void
329 utfconv(void)
330 {
331         Rune r;
332         uchar *rb;
333         char *p, *e;
334         int i;
335
336         if(nbuf < 4)
337                 return;
338
339         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
340                 if(!mime)
341                         print("utf-32be ");
342                 return;
343         } else
344         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
345                 if(!mime)
346                         print("utf-32le ");
347                 return;
348         } else
349         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
350                 memmove(buf, buf+3, nbuf-3);
351                 nbuf -= 3;
352                 return;
353         } else
354         if(memcmp(buf, "\xFE\xFF", 2) == 0){
355                 if(!mime)
356                         print("utf-16be ");
357
358                 nbuf -= 2;
359                 rb = malloc(nbuf+1);
360                 memmove(rb, buf+2, nbuf);
361                 p = (char*)buf;
362                 e = p+nbuf-4;
363                 for(i=0; i<nbuf && p < e; i+=2){
364                         r = rb[i+1] | rb[i]<<8;
365                         p += runetochar(p, &r);
366                 }
367                 *p = 0;
368                 free(rb);
369                 nbuf = p - (char*)buf;
370         } else
371         if(memcmp(buf, "\xFF\xFE", 2) == 0){
372                 if(!mime)
373                         print("utf-16le ");
374
375                 nbuf -= 2;
376                 rb = malloc(nbuf+1);
377                 memmove(rb, buf+2, nbuf);
378                 p = (char*)buf;
379                 e = p+nbuf-4;
380                 for(i=0; i<nbuf && p < e; i+=2){
381                         r = rb[i] | rb[i+1]<<8;
382                         p += runetochar(p, &r);
383                 }
384                 *p = 0;
385                 free(rb);
386                 nbuf = p - (char*)buf;
387         }
388 }
389
390 void
391 filetype(int fd)
392 {
393         Rune1 r;
394         int i, f, n;
395         char *p, *eob;
396
397         free(mbuf);
398         mbuf = dirfstat(fd);
399         if(mbuf == nil){
400                 print("cannot stat: %r\n");
401                 return;
402         }
403         if(mbuf->mode & DMDIR) {
404                 print("%s\n", mime ? OCTET : "directory");
405                 return;
406         }
407         if(mbuf->type != 'M' && mbuf->type != '|') {
408                 if(mime)
409                         print("%s\n", OCTET);
410                 else
411                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
412                 return;
413         }
414         /* may be reading a pipe on standard input */
415         nbuf = readn(fd, buf, sizeof(buf)-1);
416         if(nbuf < 0) {
417                 print("cannot read: %r\n");
418                 return;
419         }
420         if(nbuf == 0) {
421                 print("%s\n", mime ? PLAIN : "empty file");
422                 return;
423         }
424         buf[nbuf] = 0;
425
426         utfconv();
427
428         /*
429          * build histogram table
430          */
431         memset(cfreq, 0, sizeof(cfreq));
432         for (i = 0; language[i].name; i++)
433                 language[i].count = 0;
434         eob = (char *)buf+nbuf;
435         for(n = 0, p = (char *)buf; p < eob; n++) {
436                 if (!fullrune1(p, eob-p) && eob-p < UTFmax1)
437                         break;
438                 p += chartorune1(&r, p);
439                 if (r == 0)
440                         f = Cnull;
441                 else if (r <= 0x7f) {
442                         if (!isprint(r) && !isspace(r))
443                                 f = Ceascii;    /* ASCII control char */
444                         else f = r;
445                 } else if (r == 0x80) {
446                         bump_utf_count(r);
447                         f = Cutf;
448                 } else if (r < 0xA0)
449                         f = Cbinary;    /* Invalid Runes */
450                 else if (r <= 0xff)
451                         f = Clatin;     /* Latin 1 */
452                 else {
453                         bump_utf_count(r);
454                         f = Cutf;               /* UTF extension */
455                 }
456                 cfreq[f]++;                     /* ASCII chars peg directly */
457         }
458         /*
459          * gross classify
460          */
461         if (cfreq[Cbinary])
462                 guess = Fbinary;
463         else if (cfreq[Cutf])
464                 guess = Futf;
465         else if (cfreq[Clatin])
466                 guess = Flatin;
467         else if (cfreq[Ceascii])
468                 guess = Feascii;
469         else if (cfreq[Cnull])
470                 guess = Fbinary;
471         else
472                 guess = Fascii;
473         /*
474          * lookup dictionary words
475          */
476         memset(wfreq, 0, sizeof(wfreq));
477         if(guess == Fascii || guess == Flatin || guess == Futf)
478                 wordfreq();
479         /*
480          * call individual classify routines
481          */
482         for(i=0; call[i]; i++)
483                 if((*call[i])())
484                         return;
485
486         /*
487          * if all else fails,
488          * print out gross classification
489          */
490         if (nbuf < 100 && !mime)
491                 print(mime ? PLAIN : "short ");
492         if (guess == Fascii)
493                 print("%s\n", mime ? PLAIN : "Ascii");
494         else if (guess == Feascii)
495                 print("%s\n", mime ? PLAIN : "extended ascii");
496         else if (guess == Flatin)
497                 print("%s\n", mime ? PLAIN : "latin ascii");
498         else if (guess == Futf && utf_count() < 4)
499                 print_utf();
500         else print("%s\n", mime ? OCTET : "binary");
501 }
502
503 void
504 bump_utf_count(Rune r)
505 {
506         int low, high, mid;
507
508         high = sizeof(language)/sizeof(language[0])-1;
509         for (low = 0; low < high;) {
510                 mid = (low+high)/2;
511                 if (r >= language[mid].low) {
512                         if (r <= language[mid].high) {
513                                 language[mid].count++;
514                                 break;
515                         } else low = mid+1;
516                 } else high = mid;
517         }
518 }
519
520 int
521 utf_count(void)
522 {
523         int i, count;
524
525         count = 0;
526         for (i = 0; language[i].name; i++)
527                 if (language[i].count > 0)
528                         switch (language[i].mode) {
529                         case Normal:
530                         case First:
531                                 count++;
532                                 break;
533                         default:
534                                 break;
535                         }
536         return count;
537 }
538
539 int
540 chkascii(void)
541 {
542         int i;
543
544         for (i = 'a'; i < 'z'; i++)
545                 if (cfreq[i])
546                         return 1;
547         for (i = 'A'; i < 'Z'; i++)
548                 if (cfreq[i])
549                         return 1;
550         return 0;
551 }
552
553 int
554 find_first(char *name)
555 {
556         int i;
557
558         for (i = 0; language[i].name != 0; i++)
559                 if (language[i].mode == First
560                         && strcmp(language[i].name, name) == 0)
561                         return i;
562         return -1;
563 }
564
565 void
566 print_utf(void)
567 {
568         int i, printed, j;
569
570         if(mime){
571                 print("%s\n", PLAIN);
572                 return;
573         }
574         if (chkascii()) {
575                 printed = 1;
576                 print("Ascii");
577         } else
578                 printed = 0;
579         for (i = 0; language[i].name; i++)
580                 if (language[i].count) {
581                         switch(language[i].mode) {
582                         case Multi:
583                                 j = find_first(language[i].name);
584                                 if (j < 0)
585                                         break;
586                                 if (language[j].count > 0)
587                                         break;
588                                 /* Fall through */
589                         case Normal:
590                         case First:
591                                 if (printed)
592                                         print(" & ");
593                                 else printed = 1;
594                                 print("%s", language[i].name);
595                                 break;
596                         case Shared:
597                         default:
598                                 break;
599                         }
600                 }
601         if(!printed)
602                 print("UTF");
603         print(" text\n");
604 }
605
606 void
607 wordfreq(void)
608 {
609         int low, high, mid, r;
610         uchar *p, *p2, c;
611
612         p = buf;
613         for(;;) {
614                 while (p < buf+nbuf && !isalpha(*p))
615                         p++;
616                 if (p >= buf+nbuf)
617                         return;
618                 p2 = p;
619                 while(p < buf+nbuf && isalpha(*p))
620                         p++;
621                 c = *p;
622                 *p = 0;
623                 high = sizeof(dict)/sizeof(dict[0]);
624                 for(low = 0;low < high;) {
625                         mid = (low+high)/2;
626                         r = strcmp(dict[mid].word, (char*)p2);
627                         if(r == 0) {
628                                 wfreq[dict[mid].class]++;
629                                 break;
630                         }
631                         if(r < 0)
632                                 low = mid+1;
633                         else
634                                 high = mid;
635                 }
636                 *p++ = c;
637         }
638 }
639
640 typedef struct Filemagic Filemagic;
641 struct Filemagic {
642         ulong x;
643         ulong mask;
644         char *desc;
645         char *mime;
646 };
647
648 /*
649  * integers in this table must be as seen on a little-endian machine
650  * when read from a file.
651  */
652 Filemagic long0tab[] = {
653         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
654         /* "pac1" */
655         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
656         /* "pXc2 */
657         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
658         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
659         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
660         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
661         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
662         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
663         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
664         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
665         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
666         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
667         /* 0xfeedfacf */
668         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
669         /* 0xcefaedfe */
670         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
671         /* 0xcffaedfe */
672         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
673         /* 0xcafebabe */
674         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
675         /*
676          * venti & fossil magic numbers are stored big-endian on disk,
677          * thus the numbers appear reversed in this table.
678          */
679         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
680         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
681 };
682
683 int
684 filemagic(Filemagic *tab, int ntab, ulong x)
685 {
686         int i;
687
688         for(i=0; i<ntab; i++)
689                 if((x&tab[i].mask) == tab[i].x){
690                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
691                         return 1;
692                 }
693         return 0;
694 }
695
696 int
697 long0(void)
698 {
699         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
700 }
701
702 typedef struct Fileoffmag Fileoffmag;
703 struct Fileoffmag {
704         ulong   off;
705         Filemagic;
706 };
707
708 /*
709  * integers in this table must be as seen on a little-endian machine
710  * when read from a file.
711  */
712 Fileoffmag longofftab[] = {
713         /*
714          * venti & fossil magic numbers are stored big-endian on disk,
715          * thus the numbers appear reversed in this table.
716          */
717         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
718         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
719         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
720         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
721 };
722
723 int
724 fileoffmagic(Fileoffmag *tab, int ntab)
725 {
726         int i;
727         ulong x;
728         Fileoffmag *tp;
729         uchar buf[sizeof(long)];
730
731         for(i=0; i<ntab; i++) {
732                 tp = tab + i;
733                 seek(fd, tp->off, 0);
734                 if (readn(fd, buf, sizeof buf) != sizeof buf)
735                         continue;
736                 x = LENDIAN(buf);
737                 if((x&tp->mask) == tp->x){
738                         print("%s\n", mime ? tp->mime : tp->desc);
739                         return 1;
740                 }
741         }
742         return 0;
743 }
744
745 int
746 longoff(void)
747 {
748         return fileoffmagic(longofftab, nelem(longofftab));
749 }
750
751 int
752 isexec(void)
753 {
754         Fhdr f;
755
756         seek(fd, 0, 0);         /* reposition to start of file */
757         if(crackhdr(fd, &f)) {
758                 print("%s\n", mime ? OCTET : f.name);
759                 return 1;
760         }
761         return 0;
762 }
763
764
765 /* from tar.c */
766 enum { NAMSIZ = 100, TBLOCK = 512 };
767
768 union   hblock
769 {
770         char    dummy[TBLOCK];
771         struct  header
772         {
773                 char    name[NAMSIZ];
774                 char    mode[8];
775                 char    uid[8];
776                 char    gid[8];
777                 char    size[12];
778                 char    mtime[12];
779                 char    chksum[8];
780                 char    linkflag;
781                 char    linkname[NAMSIZ];
782                 /* rest are defined by POSIX's ustar format; see p1003.2b */
783                 char    magic[6];       /* "ustar" */
784                 char    version[2];
785                 char    uname[32];
786                 char    gname[32];
787                 char    devmajor[8];
788                 char    devminor[8];
789                 char    prefix[155];  /* if non-null, path = prefix "/" name */
790         } dbuf;
791 };
792
793 int
794 checksum(union hblock *hp)
795 {
796         int i;
797         char *cp;
798         struct header *hdr = &hp->dbuf;
799
800         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
801                 *cp = ' ';
802         i = 0;
803         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
804                 i += *cp & 0xff;
805         return i;
806 }
807
808 int
809 istar(void)
810 {
811         int chksum;
812         char tblock[TBLOCK];
813         union hblock *hp = (union hblock *)tblock;
814         struct header *hdr = &hp->dbuf;
815
816         seek(fd, 0, 0);         /* reposition to start of file */
817         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
818                 return 0;
819         chksum = strtol(hdr->chksum, 0, 8);
820         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
821                 if (strcmp(hdr->magic, "ustar") == 0)
822                         print(mime? "application/x-ustar\n": "posix tar archive\n");
823                 else
824                         print(mime? "application/x-tar\n": "tar archive\n");
825                 return 1;
826         }
827         return 0;
828 }
829
830 /*
831  * initial words to classify file
832  */
833 struct  FILE_STRING
834 {
835         char    *key;
836         char    *filetype;
837         int     length;
838         char    *mime;
839 } file_string[] =
840 {
841         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
842         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
843         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
844         "!<arch>\n__.SYMDEF",   "archive random library",       16,     "application/octet-stream",
845         "!<arch>\n",            "archive",                      8,      "application/octet-stream",
846         "070707",               "cpio archive - ascii header",  6,      "application/octet-stream",
847         "#!/bin/rc",            "rc executable file",           9,      "text/plain",
848         "#!/bin/sh",            "sh executable file",           9,      "text/plain",
849         "%!",                   "postscript",                   2,      "application/postscript",
850         "\004%!",               "postscript",                   3,      "application/postscript",
851         "x T post",             "troff output for post",        8,      "application/troff",
852         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
853         "x T utf",              "troff output for UTF",         7,      "application/troff",
854         "x T 202",              "troff output for 202",         7,      "application/troff",
855         "x T aps",              "troff output for aps",         7,      "application/troff",
856         "x T ",                 "troff output",                 4,      "application/troff",
857         "GIF",                  "GIF image",                    3,      "image/gif",
858         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
859         "%PDF",                 "PDF",                          4,      "application/pdf",
860         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
861         "<!doctype",            "HTML file",                    9,      "text/html",
862         "<!--",                 "HTML file",                    4,      "text/html",
863         "<html>",               "HTML file",                    6,      "text/html",
864         "<HTML>",               "HTML file",                    6,      "text/html",
865         "<?xml",                "HTML file",                    5,      "text/html",
866         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
867         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
868         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
869         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
870         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
871         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
872         "BM",                   "bmp",                          2,      "image/bmp", 
873         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
874         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
875         "\033E\033",    "HP PCL printer data",          3,      OCTET,
876         "\033&",        "HP PCL printer data",          2,      OCTET,
877         "\033%-12345X", "HPJCL file",           9,      "application/hpjcl",
878         "\033Lua",              "Lua bytecode",         4,      OCTET,
879         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
880         "OggS",                 "ogg audio",            4,      "audio/ogg",
881         ".snd",                 "sun audio",            4,      "audio/basic",
882         "\211PNG",              "PNG image",            4,      "image/png",
883         "P3\n",                 "ppm",                          3,      "image/ppm",
884         "P6\n",                 "ppm",                          3,      "image/ppm",
885         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
886         ".HTML ",               "troff -ms input",      6,      "text/troff",
887         ".LP",                  "troff -ms input",      3,      "text/troff",
888         ".ND",                  "troff -ms input",      3,      "text/troff",
889         ".PP",                  "troff -ms input",      3,      "text/troff",
890         ".TL",                  "troff -ms input",      3,      "text/troff",
891         ".TR",                  "troff -ms input",      3,      "text/troff",
892         ".TH",                  "manual page",          3,      "text/troff",
893         ".\\\"",                "troff input",          3,      "text/troff",
894         ".de",                  "troff input",          3,      "text/troff",
895         ".if",                  "troff input",          3,      "text/troff",
896         ".nr",                  "troff input",          3,      "text/troff",
897         ".tr",                  "troff input",          3,      "text/troff",
898         "vac:",                 "venti score",          4,      "text/plain",
899         "-----BEGIN CERTIFICATE-----\n",
900                                 "pem certificate",      -1,     "text/plain",
901         "-----BEGIN TRUSTED CERTIFICATE-----\n",
902                                 "pem trusted certificate", -1,  "text/plain",
903         "-----BEGIN X509 CERTIFICATE-----\n",
904                                 "pem x.509 certificate", -1,    "text/plain",
905         "subject=/C=",          "pem certificate with header", -1, "text/plain",
906         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
907         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
908         "[playlist]",           "playlist",             10,     "application/x-scpls",
909         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
910         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
911         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
912         0,0,0,0
913 };
914
915 int
916 istring(void)
917 {
918         int i, l;
919         struct FILE_STRING *p;
920
921         for(p = file_string; p->key; p++) {
922                 l = p->length;
923                 if(l == -1)
924                         l = strlen(p->key);
925                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
926                         print("%s\n", mime ? p->mime : p->filetype);
927                         return 1;
928                 }
929         }
930         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
931                 for(i = 5; i < nbuf; i++)
932                         if(buf[i] == '\n')
933                                 break;
934                 if(mime)
935                         print("%s\n", OCTET);
936                 else
937                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
938                 return 1;
939         }
940         return 0;
941 }
942
943 struct offstr
944 {
945         ulong   off;
946         struct FILE_STRING;
947 } offstrs[] = {
948         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
949         0, 0, 0, 0, 0
950 };
951
952 int
953 isoffstr(void)
954 {
955         int n;
956         char buf[256];
957         struct offstr *p;
958
959         for(p = offstrs; p->key; p++) {
960                 seek(fd, p->off, 0);
961                 n = p->length;
962                 if (n > sizeof buf)
963                         n = sizeof buf;
964                 if (readn(fd, buf, n) != n)
965                         continue;
966                 if(memcmp(buf, p->key, n) == 0) {
967                         print("%s\n", mime ? p->mime : p->filetype);
968                         return 1;
969                 }
970         }
971         return 0;
972 }
973
974 int
975 iff(void)
976 {
977         if (strncmp((char*)buf, "FORM", 4) == 0 &&
978             strncmp((char*)buf+8, "AIFF", 4) == 0) {
979                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
980                 return 1;
981         }
982         if (strncmp((char*)buf, "RIFF", 4) == 0) {
983                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
984                         print("%s\n", mime? "audio/wave": "wave audio");
985                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
986                         print("%s\n", mime? "video/avi": "avi video");
987                 else
988                         print("%s\n", mime? "application/octet-stream": "riff file");
989                 return 1;
990         }
991         return 0;
992 }
993
994 char*   html_string[] = {
995         "blockquote",
996         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
997         "caption",
998         "button", "center", "iframe", "object", "option", "script",
999         "select", "strong",
1000         "blink", "embed", "frame", "input", "label", "param", "small",
1001         "style", "table", "tbody", "tfoot", "thead", "title",
1002         "?xml", "body", "code", "font", "form", "head", "html",
1003         "link", "menu", "meta", "span",
1004         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
1005         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
1006         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
1007         "a", "b", "i", "p", "q", "u",
1008         0,
1009 };
1010
1011 int
1012 ishtml(void)
1013 {
1014         int i, n, count;
1015         uchar *p;
1016
1017         count = 0;
1018         p = buf;
1019         for(;;) {
1020                 while(p < buf+nbuf && *p != '<')
1021                         p++;
1022                 p++;
1023                 if (p >= buf+nbuf)
1024                         break;
1025                 if(*p == '/')
1026                         p++;
1027                 if(p >= buf+nbuf)
1028                         break;
1029                 for(i = 0; html_string[i]; i++){
1030                         n = strlen(html_string[i]);
1031                         if(p + n > buf+nbuf)
1032                                 continue;
1033                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1034                                 p += n;
1035                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1036                                         if(++count > 2) {
1037                                                 print("%s\n", mime ? "text/html" : "HTML file");
1038                                                 return 1;
1039                                         }
1040                                 }
1041                                 break;
1042                         }
1043                 }
1044         }
1045         return 0;
1046 }
1047
1048 char*   rfc822_string[] =
1049 {
1050         "from:",
1051         "date:",
1052         "to:",
1053         "subject:",
1054         "received:",
1055         "reply to:",
1056         "sender:",
1057         0,
1058 };
1059
1060 int
1061 isrfc822(void)
1062 {
1063
1064         char *p, *q, *r;
1065         int i, count;
1066
1067         count = 0;
1068         p = (char*)buf;
1069         for(;;) {
1070                 q = strchr(p, '\n');
1071                 if(q == nil)
1072                         break;
1073                 *q = 0;
1074                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1075                         count++;
1076                         *q = '\n';
1077                         p = q+1;
1078                         continue;
1079                 }
1080                 *q = '\n';
1081                 if(*p != '\t' && *p != ' '){
1082                         r = strchr(p, ':');
1083                         if(r == 0 || r > q)
1084                                 break;
1085                         for(i = 0; rfc822_string[i]; i++) {
1086                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1087                                         count++;
1088                                         break;
1089                                 }
1090                         }
1091                 }
1092                 p = q+1;
1093         }
1094         if(count >= 3){
1095                 print("%s\n", mime ? "message/rfc822" : "email file");
1096                 return 1;
1097         }
1098         return 0;
1099 }
1100
1101 int
1102 ismbox(void)
1103 {
1104         char *p, *q;
1105
1106         p = (char*)buf;
1107         q = strchr(p, '\n');
1108         if(q == nil)
1109                 return 0;
1110         *q = 0;
1111         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1112                 print("%s\n", mime ? "text/plain" : "mail box");
1113                 return 1;
1114         }
1115         *q = '\n';
1116         return 0;
1117 }
1118
1119 int
1120 iscint(void)
1121 {
1122         int type;
1123         char *name;
1124         Biobuf b;
1125
1126         if(Binit(&b, fd, OREAD) == Beof)
1127                 return 0;
1128         seek(fd, 0, 0);
1129         type = objtype(&b, &name);
1130         if(type < 0)
1131                 return 0;
1132         if(mime)
1133                 print("%s\n", OCTET);
1134         else
1135                 print("%s intermediate\n", name);
1136         return 1;
1137 }
1138
1139 int
1140 isc(void)
1141 {
1142         int n;
1143
1144         n = wfreq[I1];
1145         /*
1146          * includes
1147          */
1148         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1149                 goto yes;
1150         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1151                 goto yes;
1152         /*
1153          * declarations
1154          */
1155         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1156                 goto yes;
1157         /*
1158          * assignments
1159          */
1160         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1161                 goto yes;
1162         return 0;
1163
1164 yes:
1165         if(mime){
1166                 print("%s\n", PLAIN);
1167                 return 1;
1168         }
1169         if(wfreq[Alword] > 0)
1170                 print("alef program\n");
1171         else
1172                 print("c program\n");
1173         return 1;
1174 }
1175
1176 int
1177 islimbo(void)
1178 {
1179         /*
1180          * includes
1181          */
1182         if(wfreq[Lword] < 4)
1183                 return 0;
1184         print("%s\n", mime ? PLAIN : "limbo program");
1185         return 1;
1186 }
1187
1188 int
1189 isas(void)
1190 {
1191         /*
1192          * includes
1193          */
1194         if(wfreq[Aword] < 2)
1195                 return 0;
1196         print("%s\n", mime ? PLAIN : "as program");
1197         return 1;
1198 }
1199
1200 int
1201 ismp3(void)
1202 {
1203         uchar *p, *e;
1204
1205         p = buf;
1206         e = p + nbuf-1;
1207         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1208                 if((p[1] & 0xFE) == 0xFA){
1209                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1210                         return 1;
1211                 }
1212                 p++;
1213         }
1214         return 0;
1215 }
1216
1217 /*
1218  * low entropy means encrypted
1219  */
1220 int
1221 ismung(void)
1222 {
1223         int i, bucket[8];
1224         float cs;
1225
1226         if(nbuf < 64)
1227                 return 0;
1228         memset(bucket, 0, sizeof(bucket));
1229         for(i=nbuf-64; i<nbuf; i++)
1230                 bucket[(buf[i]>>5)&07] += 1;
1231
1232         cs = 0.;
1233         for(i=0; i<8; i++)
1234                 cs += (bucket[i]-8)*(bucket[i]-8);
1235         cs /= 8.;
1236         if(cs <= 24.322) {
1237                 if(buf[0]==0x1f && buf[1]==0x9d)
1238                         print("%s\n", mime ? "application/x-compress" : "compressed");
1239                 else
1240                 if(buf[0]==0x1f && buf[1]==0x8b)
1241                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1242                 else
1243                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1244                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1245                 else
1246                 if(buf[0]==0x78 && buf[1]==0x9c)
1247                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1248                 else
1249                         print("%s\n", mime ? OCTET : "encrypted");
1250                 return 1;
1251         }
1252         return 0;
1253 }
1254
1255 /*
1256  * english by punctuation and frequencies
1257  */
1258 int
1259 isenglish(void)
1260 {
1261         int vow, comm, rare, badpun, punct;
1262         char *p;
1263
1264         if(guess != Fascii && guess != Feascii)
1265                 return 0;
1266         badpun = 0;
1267         punct = 0;
1268         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1269                 switch(*p) {
1270                 case '.':
1271                 case ',':
1272                 case ')':
1273                 case '%':
1274                 case ';':
1275                 case ':':
1276                 case '?':
1277                         punct++;
1278                         if(p[1] != ' ' && p[1] != '\n')
1279                                 badpun++;
1280                 }
1281         if(badpun*5 > punct)
1282                 return 0;
1283         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1284                 return 0;
1285         if(2*cfreq[';'] > cfreq['e'])
1286                 return 0;
1287
1288         vow = 0;
1289         for(p="AEIOU"; *p; p++) {
1290                 vow += cfreq[*p];
1291                 vow += cfreq[tolower(*p)];
1292         }
1293         comm = 0;
1294         for(p="ETAION"; *p; p++) {
1295                 comm += cfreq[*p];
1296                 comm += cfreq[tolower(*p)];
1297         }
1298         rare = 0;
1299         for(p="VJKQXZ"; *p; p++) {
1300                 rare += cfreq[*p];
1301                 rare += cfreq[tolower(*p)];
1302         }
1303         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1304                 print("%s\n", mime ? PLAIN : "English text");
1305                 return 1;
1306         }
1307         return 0;
1308 }
1309
1310 /*
1311  * pick up a number with
1312  * syntax _*[0-9]+_
1313  */
1314 #define P9BITLEN        12
1315 int
1316 p9bitnum(uchar *bp)
1317 {
1318         int n, c, len;
1319
1320         len = P9BITLEN;
1321         while(*bp == ' ') {
1322                 bp++;
1323                 len--;
1324                 if(len <= 0)
1325                         return -1;
1326         }
1327         n = 0;
1328         while(len > 1) {
1329                 c = *bp++;
1330                 if(!isdigit(c))
1331                         return -1;
1332                 n = n*10 + c-'0';
1333                 len--;
1334         }
1335         if(*bp != ' ')
1336                 return -1;
1337         return n;
1338 }
1339
1340 int
1341 depthof(char *s, int *newp)
1342 {
1343         char *es;
1344         int d;
1345
1346         *newp = 0;
1347         es = s+12;
1348         while(s<es && *s==' ')
1349                 s++;
1350         if(s == es)
1351                 return -1;
1352         if('0'<=*s && *s<='9')
1353                 return 1<<strtol(s, 0, 0);
1354
1355         *newp = 1;
1356         d = 0;
1357         while(s<es && *s!=' '){
1358                 s++;                    /* skip letter */
1359                 d += strtoul(s, &s, 10);
1360         }
1361
1362         if(d % 8 == 0 || 8 % d == 0)
1363                 return d;
1364         else
1365                 return -1;
1366 }
1367
1368 int
1369 isp9bit(void)
1370 {
1371         int dep, lox, loy, hix, hiy, px, new, cmpr;
1372         ulong t;
1373         long len;
1374         char *newlabel;
1375         uchar *cp;
1376
1377         cp = buf;
1378         cmpr = 0;
1379         newlabel = "old ";
1380
1381         if(memcmp(cp, "compressed\n", 11) == 0) {
1382                 cmpr = 1;
1383                 cp = buf + 11;
1384         }
1385
1386         dep = depthof((char*)cp + 0*P9BITLEN, &new);
1387         if(new)
1388                 newlabel = "";
1389         lox = p9bitnum(cp + 1*P9BITLEN);
1390         loy = p9bitnum(cp + 2*P9BITLEN);
1391         hix = p9bitnum(cp + 3*P9BITLEN);
1392         hiy = p9bitnum(cp + 4*P9BITLEN);
1393         if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1394                 return 0;
1395
1396         if(dep < 8){
1397                 px = 8/dep;             /* pixels per byte */
1398                 /* set l to number of bytes of data per scan line */
1399                 if(lox >= 0)
1400                         len = (hix+px-1)/px - lox/px;
1401                 else{                   /* make positive before divide */
1402                         t = (-lox)+px-1;
1403                         t = (t/px)*px;
1404                         len = (t+hix+px-1)/px;
1405                 }
1406         }else
1407                 len = (hix-lox)*dep/8;
1408         len *= hiy - loy;               /* col length */
1409         len += 5 * P9BITLEN;            /* size of initial ascii */
1410
1411         /*
1412          * for compressed images, don't look any further. otherwise:
1413          * for image file, length is non-zero and must match calculation above.
1414          * for /dev/window and /dev/screen the length is always zero.
1415          * for subfont, the subfont header should follow immediately.
1416          */
1417         if (cmpr) {
1418                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1419                         newlabel, dep);
1420                 return 1;
1421         }
1422         /*
1423          * mbuf->length == 0 probably indicates reading a pipe.
1424          * Ghostscript sometimes produces a little extra on the end.
1425          */
1426         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1427             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1428                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1429                 return 1;
1430         }
1431         if (p9subfont(buf+len)) {
1432                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1433                 return 1;
1434         }
1435         return 0;
1436 }
1437
1438 int
1439 p9subfont(uchar *p)
1440 {
1441         int n, h, a;
1442
1443         /* if image too big, assume it's a subfont */
1444         if (p+3*P9BITLEN > buf+sizeof(buf))
1445                 return 1;
1446
1447         n = p9bitnum(p + 0*P9BITLEN);   /* char count */
1448         if (n < 0)
1449                 return 0;
1450         h = p9bitnum(p + 1*P9BITLEN);   /* height */
1451         if (h < 0)
1452                 return 0;
1453         a = p9bitnum(p + 2*P9BITLEN);   /* ascent */
1454         if (a < 0)
1455                 return 0;
1456         return 1;
1457 }
1458
1459 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1460
1461 int
1462 isp9font(void)
1463 {
1464         uchar *cp, *p;
1465         int i, n;
1466         char pathname[1024];
1467
1468         cp = buf;
1469         if (!getfontnum(cp, &cp))       /* height */
1470                 return 0;
1471         if (!getfontnum(cp, &cp))       /* ascent */
1472                 return 0;
1473         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1474                 if (!getfontnum(cp, &cp))       /* min */
1475                         break;
1476                 if (!getfontnum(cp, &cp))       /* max */
1477                         return 0;
1478                 getfontnum(cp, &cp);    /* optional offset */
1479                 while (WHITESPACE(*cp))
1480                         cp++;
1481                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1482                                 ;
1483                         /* construct a path name, if needed */
1484                 n = 0;
1485                 if (*p != '/' && slash) {
1486                         n = slash-fname+1;
1487                         if (n < sizeof(pathname))
1488                                 memcpy(pathname, fname, n);
1489                         else n = 0;
1490                 }
1491                 if (n+cp-p+4 < sizeof(pathname)) {
1492                         memcpy(pathname+n, p, cp-p);
1493                         n += cp-p;
1494                         pathname[n] = 0;
1495                         if (access(pathname, AEXIST) < 0) {
1496                                 strcpy(pathname+n, ".0");
1497                                 if (access(pathname, AEXIST) < 0)
1498                                         return 0;
1499                         }
1500                 }
1501         }
1502         if (i) {
1503                 print(mime ? "text/plain\n" : "font file\n");
1504                 return 1;
1505         }
1506         return 0;
1507 }
1508
1509 int
1510 getfontnum(uchar *cp, uchar **rp)
1511 {
1512         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1513                 cp++;
1514         if (*cp < '0' || *cp > '9')
1515                 return 0;
1516         strtoul((char *)cp, (char **)rp, 0);
1517         if (!WHITESPACE(**rp)) {
1518                 *rp = cp;
1519                 return 0;
1520         }
1521         return 1;
1522 }
1523
1524 int
1525 isrtf(void)
1526 {
1527         if(strstr((char *)buf, "\\rtf1")){
1528                 print(mime ? "application/rtf\n" : "rich text format\n");
1529                 return 1;
1530         }
1531         return 0;
1532 }
1533
1534 int
1535 ismsdos(void)
1536 {
1537         if (buf[0] == 0x4d && buf[1] == 0x5a){
1538                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1539                 return 1;
1540         }
1541         return 0;
1542 }
1543
1544 int
1545 isicocur(void)
1546 {
1547         if(buf[0] || buf[1] || buf[3] || buf[9])
1548                 return 0;
1549         if(buf[4] == 0x00 && buf[5] == 0x00)
1550                 return 0;
1551         switch(buf[2]){
1552         case 1:
1553                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1554                 return 1;
1555         case 2:
1556                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1557                 return 1;
1558         }
1559         return 0;
1560 }
1561
1562 int
1563 iself(void)
1564 {
1565         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1566         [1]     "WE32100",
1567         [2]     "SPARC",
1568         [3]     "i386",
1569         [4]     "M68000",
1570         [5]     "M88000",
1571         [6]     "i486",
1572         [7]     "i860",
1573         [8]     "R3000",
1574         [9]     "S370",
1575         [10]    "R4000",
1576         [15]    "HP-PA",
1577         [18]    "sparc v8+",
1578         [19]    "i960",
1579         [20]    "PPC-32",
1580         [21]    "PPC-64",
1581         [40]    "ARM",
1582         [41]    "Alpha",
1583         [43]    "sparc v9",
1584         [50]    "IA-64",
1585         [62]    "AMD64",
1586         [75]    "VAX",
1587         };
1588         static char *type[] = {
1589         [1]     "relocatable object",
1590         [2]     "executable",
1591         [3]     "shared library",
1592         [4]     "core dump",
1593         };
1594
1595         if (memcmp(buf, "\x7fELF", 4) == 0){
1596                 if (!mime){
1597                         int isdifend = 0;
1598                         int n = (buf[19] << 8) | buf[18];
1599                         char *p = "unknown";
1600                         char *t = "unknown";
1601
1602                         if (n > 0 && n < nelem(cpu) && cpu[n])
1603                                 p = cpu[n];
1604                         else {
1605                                 /* try the other byte order */
1606                                 isdifend = 1;
1607                                 n = (buf[18] << 8) | buf[19];
1608                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1609                                         p = cpu[n];
1610                         }
1611                         if(isdifend)
1612                                 n = (buf[16]<< 8) | buf[17];
1613                         else
1614                                 n = (buf[17]<< 8) | buf[16];
1615
1616                         if(n>0 && n < nelem(type) && type[n])
1617                                 t = type[n];
1618                         print("%s ELF %s\n", p, t);
1619                 }
1620                 else
1621                         print("application/x-elf-executable\n");
1622                 return 1;
1623         }
1624
1625         return 0;
1626 }
1627
1628 int
1629 isface(void)
1630 {
1631         int i, j, ldepth, l;
1632         char *p;
1633
1634         ldepth = -1;
1635         for(j = 0; j < 3; j++){
1636                 for(p = (char*)buf, i=0; i<3; i++){
1637                         if(p[0] != '0' || p[1] != 'x')
1638                                 return 0;
1639                         if(buf[2+8] == ',')
1640                                 l = 2;
1641                         else if(buf[2+4] == ',')
1642                                 l = 1;
1643                         else
1644                                 return 0;
1645                         if(ldepth == -1)
1646                                 ldepth = l;
1647                         if(l != ldepth)
1648                                 return 0;
1649                         strtoul(p, &p, 16);
1650                         if(*p++ != ',')
1651                                 return 0;
1652                         while(*p == ' ' || *p == '\t')
1653                                 p++;
1654                 }
1655                 if (*p++ != '\n')
1656                         return 0;
1657         }
1658
1659         if(mime)
1660                 print("application/x-face\n");
1661         else
1662                 print("face image depth %d\n", ldepth);
1663         return 1;
1664 }
1665