]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
tga, file: 15-bit tga images
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismung(void);
156 int     isp9bit(void);
157 int     isp9font(void);
158 int     isrtf(void);
159 int     ismsdos(void);
160 int     isicocur(void);
161 int     iself(void);
162 int     istring(void);
163 int     isoffstr(void);
164 int     iff(void);
165 int     long0(void);
166 int     longoff(void);
167 int     istar(void);
168 int     isface(void);
169 int     isexec(void);
170 int     p9bitnum(uchar*);
171 int     p9subfont(uchar*);
172 void    print_utf(void);
173 void    type(char*, int);
174 int     utf_count(void);
175 void    wordfreq(void);
176
177 int     (*call[])(void) =
178 {
179         long0,          /* recognizable by first 4 bytes */
180         istring,        /* recognizable by first string */
181         iself,          /* ELF (foreign) executable */
182         isexec,         /* native executables */
183         iff,            /* interchange file format (strings) */
184         longoff,        /* recognizable by 4 bytes at some offset */
185         isoffstr,       /* recognizable by string at some offset */
186         isrfc822,       /* email file */
187         ismbox,         /* mail box */
188         istar,          /* recognizable by tar checksum */
189         ishtml,         /* html keywords */
190         iscint,         /* compiler/assembler intermediate */
191         islimbo,        /* limbo source */
192         isc,            /* c & alef compiler key words */
193         isas,           /* assembler key words */
194         isp9font,       /* plan 9 font */
195         isp9bit,        /* plan 9 image (as from /dev/window) */
196         isrtf,          /* rich text format */
197         ismsdos,        /* msdos exe (virus file attachement) */
198         isicocur,               /* windows icon or cursor file */
199         isface,         /* ascii face file */
200         istga,
201         ismp3,
202
203         /* last resorts */
204         ismung,         /* entropy compressed/encrypted */
205         isenglish,      /* char frequency English */
206         0
207 };
208
209 int mime;
210
211 char OCTET[] =  "application/octet-stream";
212 char PLAIN[] =  "text/plain";
213
214 void
215 main(int argc, char *argv[])
216 {
217         int i, j, maxlen;
218         char *cp;
219         Rune r;
220
221         ARGBEGIN{
222         case 'm':
223                 mime = 1;
224                 break;
225         default:
226                 fprint(2, "usage: file [-m] [file...]\n");
227                 exits("usage");
228         }ARGEND;
229
230         maxlen = 0;
231         if(mime == 0 || argc > 1){
232                 for(i = 0; i < argc; i++) {
233                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
234                                         ;
235                         if(j > maxlen)
236                                 maxlen = j;
237                 }
238         }
239         if (argc <= 0) {
240                 if(!mime)
241                         print ("stdin: ");
242                 filetype(0);
243         }
244         else {
245                 for(i = 0; i < argc; i++)
246                         type(argv[i], maxlen);
247         }
248         exits(0);
249 }
250
251 void
252 type(char *file, int nlen)
253 {
254         Rune r;
255         int i;
256         char *p;
257
258         if(nlen > 0){
259                 slash = 0;
260                 for (i = 0, p = file; *p; i++) {
261                         if (*p == '/')                  /* find rightmost slash */
262                                 slash = p;
263                         p += chartorune(&r, p);         /* count runes */
264                 }
265                 print("%s:%*s",file, nlen-i+1, "");
266         }
267         fname = file;
268         if ((fd = open(file, OREAD)) < 0) {
269                 print("cannot open: %r\n");
270                 return;
271         }
272         filetype(fd);
273         close(fd);
274 }
275
276 /*
277  * Unicode 4.0 4-byte runes.
278  */
279 typedef int Rune1;
280
281 enum {
282         UTFmax1 = 4,
283 };
284
285 int
286 fullrune1(char *p, int n)
287 {
288         int c;
289
290         if(n >= 1) {
291                 c = *(uchar*)p;
292                 if(c < 0x80)
293                         return 1;
294                 if(n >= 2 && c < 0xE0)
295                         return 1;
296                 if(n >= 3 && c < 0xF0)
297                         return 1;
298                 if(n >= 4)
299                         return 1;
300         }
301         return 0;
302 }
303
304 int
305 chartorune1(Rune1 *rune, char *str)
306 {
307         int c, c1, c2, c3, n;
308         Rune r;
309
310         c = *(uchar*)str;
311         if(c < 0xF0){
312                 r = 0;
313                 n = chartorune(&r, str);
314                 *rune = r;
315                 return n;
316         }
317         c &= ~0xF0;
318         c1 = *(uchar*)(str+1) & ~0x80;
319         c2 = *(uchar*)(str+2) & ~0x80;
320         c3 = *(uchar*)(str+3) & ~0x80;
321         n = (c<<18) | (c1<<12) | (c2<<6) | c3;
322         if(n < 0x10000 || n > 0x10FFFF){
323                 *rune = Runeerror;
324                 return 1;
325         }
326         *rune = n;
327         return 4;
328 }
329
330 void
331 utfconv(void)
332 {
333         Rune r;
334         uchar *rb;
335         char *p, *e;
336         int i;
337
338         if(nbuf < 4)
339                 return;
340
341         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
342                 if(!mime)
343                         print("utf-32be ");
344                 return;
345         } else
346         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
347                 if(!mime)
348                         print("utf-32le ");
349                 return;
350         } else
351         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
352                 memmove(buf, buf+3, nbuf-3);
353                 nbuf -= 3;
354                 return;
355         } else
356         if(memcmp(buf, "\xFE\xFF", 2) == 0){
357                 if(!mime)
358                         print("utf-16be ");
359
360                 nbuf -= 2;
361                 rb = malloc(nbuf+1);
362                 memmove(rb, buf+2, nbuf);
363                 p = (char*)buf;
364                 e = p+sizeof(buf)-UTFmax-1;
365                 for(i=0; i<nbuf && p < e; i+=2){
366                         r = rb[i+1] | rb[i]<<8;
367                         p += runetochar(p, &r);
368                 }
369                 *p = 0;
370                 free(rb);
371                 nbuf = p - (char*)buf;
372         } else
373         if(memcmp(buf, "\xFF\xFE", 2) == 0){
374                 if(!mime)
375                         print("utf-16le ");
376
377                 nbuf -= 2;
378                 rb = malloc(nbuf+1);
379                 memmove(rb, buf+2, nbuf);
380                 p = (char*)buf;
381                 e = p+sizeof(buf)-UTFmax-1;
382                 for(i=0; i<nbuf && p < e; i+=2){
383                         r = rb[i] | rb[i+1]<<8;
384                         p += runetochar(p, &r);
385                 }
386                 *p = 0;
387                 free(rb);
388                 nbuf = p - (char*)buf;
389         }
390 }
391
392 void
393 filetype(int fd)
394 {
395         Rune1 r;
396         int i, f, n;
397         char *p, *eob;
398
399         free(mbuf);
400         mbuf = dirfstat(fd);
401         if(mbuf == nil){
402                 print("cannot stat: %r\n");
403                 return;
404         }
405         if(mbuf->mode & DMDIR) {
406                 print("%s\n", mime ? OCTET : "directory");
407                 return;
408         }
409         if(mbuf->type != 'M' && mbuf->type != '|') {
410                 if(mime)
411                         print("%s\n", OCTET);
412                 else
413                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
414                 return;
415         }
416         /* may be reading a pipe on standard input */
417         nbuf = readn(fd, buf, sizeof(buf)-1);
418         if(nbuf < 0) {
419                 print("cannot read: %r\n");
420                 return;
421         }
422         if(nbuf == 0) {
423                 print("%s\n", mime ? PLAIN : "empty file");
424                 return;
425         }
426         buf[nbuf] = 0;
427
428         utfconv();
429
430         /*
431          * build histogram table
432          */
433         memset(cfreq, 0, sizeof(cfreq));
434         for (i = 0; language[i].name; i++)
435                 language[i].count = 0;
436         eob = (char *)buf+nbuf;
437         for(n = 0, p = (char *)buf; p < eob; n++) {
438                 if (!fullrune1(p, eob-p) && eob-p < UTFmax1)
439                         break;
440                 p += chartorune1(&r, p);
441                 if (r == 0)
442                         f = Cnull;
443                 else if (r <= 0x7f) {
444                         if (!isprint(r) && !isspace(r))
445                                 f = Ceascii;    /* ASCII control char */
446                         else f = r;
447                 } else if (r == 0x80) {
448                         bump_utf_count(r);
449                         f = Cutf;
450                 } else if (r < 0xA0)
451                         f = Cbinary;    /* Invalid Runes */
452                 else if (r <= 0xff)
453                         f = Clatin;     /* Latin 1 */
454                 else {
455                         bump_utf_count(r);
456                         f = Cutf;               /* UTF extension */
457                 }
458                 cfreq[f]++;                     /* ASCII chars peg directly */
459         }
460         /*
461          * gross classify
462          */
463         if (cfreq[Cbinary])
464                 guess = Fbinary;
465         else if (cfreq[Cutf])
466                 guess = Futf;
467         else if (cfreq[Clatin])
468                 guess = Flatin;
469         else if (cfreq[Ceascii])
470                 guess = Feascii;
471         else if (cfreq[Cnull])
472                 guess = Fbinary;
473         else
474                 guess = Fascii;
475         /*
476          * lookup dictionary words
477          */
478         memset(wfreq, 0, sizeof(wfreq));
479         if(guess == Fascii || guess == Flatin || guess == Futf)
480                 wordfreq();
481         /*
482          * call individual classify routines
483          */
484         for(i=0; call[i]; i++)
485                 if((*call[i])())
486                         return;
487
488         /*
489          * if all else fails,
490          * print out gross classification
491          */
492         if (nbuf < 100 && !mime)
493                 print(mime ? PLAIN : "short ");
494         if (guess == Fascii)
495                 print("%s\n", mime ? PLAIN : "Ascii");
496         else if (guess == Feascii)
497                 print("%s\n", mime ? PLAIN : "extended ascii");
498         else if (guess == Flatin)
499                 print("%s\n", mime ? PLAIN : "latin ascii");
500         else if (guess == Futf && utf_count() < 4)
501                 print_utf();
502         else print("%s\n", mime ? OCTET : "binary");
503 }
504
505 void
506 bump_utf_count(Rune r)
507 {
508         int low, high, mid;
509
510         high = sizeof(language)/sizeof(language[0])-1;
511         for (low = 0; low < high;) {
512                 mid = (low+high)/2;
513                 if (r >= language[mid].low) {
514                         if (r <= language[mid].high) {
515                                 language[mid].count++;
516                                 break;
517                         } else low = mid+1;
518                 } else high = mid;
519         }
520 }
521
522 int
523 utf_count(void)
524 {
525         int i, count;
526
527         count = 0;
528         for (i = 0; language[i].name; i++)
529                 if (language[i].count > 0)
530                         switch (language[i].mode) {
531                         case Normal:
532                         case First:
533                                 count++;
534                                 break;
535                         default:
536                                 break;
537                         }
538         return count;
539 }
540
541 int
542 chkascii(void)
543 {
544         int i;
545
546         for (i = 'a'; i < 'z'; i++)
547                 if (cfreq[i])
548                         return 1;
549         for (i = 'A'; i < 'Z'; i++)
550                 if (cfreq[i])
551                         return 1;
552         return 0;
553 }
554
555 int
556 find_first(char *name)
557 {
558         int i;
559
560         for (i = 0; language[i].name != 0; i++)
561                 if (language[i].mode == First
562                         && strcmp(language[i].name, name) == 0)
563                         return i;
564         return -1;
565 }
566
567 void
568 print_utf(void)
569 {
570         int i, printed, j;
571
572         if(mime){
573                 print("%s\n", PLAIN);
574                 return;
575         }
576         if (chkascii()) {
577                 printed = 1;
578                 print("Ascii");
579         } else
580                 printed = 0;
581         for (i = 0; language[i].name; i++)
582                 if (language[i].count) {
583                         switch(language[i].mode) {
584                         case Multi:
585                                 j = find_first(language[i].name);
586                                 if (j < 0)
587                                         break;
588                                 if (language[j].count > 0)
589                                         break;
590                                 /* Fall through */
591                         case Normal:
592                         case First:
593                                 if (printed)
594                                         print(" & ");
595                                 else printed = 1;
596                                 print("%s", language[i].name);
597                                 break;
598                         case Shared:
599                         default:
600                                 break;
601                         }
602                 }
603         if(!printed)
604                 print("UTF");
605         print(" text\n");
606 }
607
608 void
609 wordfreq(void)
610 {
611         int low, high, mid, r;
612         uchar *p, *p2, c;
613
614         p = buf;
615         for(;;) {
616                 while (p < buf+nbuf && !isalpha(*p))
617                         p++;
618                 if (p >= buf+nbuf)
619                         return;
620                 p2 = p;
621                 while(p < buf+nbuf && isalpha(*p))
622                         p++;
623                 c = *p;
624                 *p = 0;
625                 high = sizeof(dict)/sizeof(dict[0]);
626                 for(low = 0;low < high;) {
627                         mid = (low+high)/2;
628                         r = strcmp(dict[mid].word, (char*)p2);
629                         if(r == 0) {
630                                 wfreq[dict[mid].class]++;
631                                 break;
632                         }
633                         if(r < 0)
634                                 low = mid+1;
635                         else
636                                 high = mid;
637                 }
638                 *p++ = c;
639         }
640 }
641
642 typedef struct Filemagic Filemagic;
643 struct Filemagic {
644         ulong x;
645         ulong mask;
646         char *desc;
647         char *mime;
648 };
649
650 /*
651  * integers in this table must be as seen on a little-endian machine
652  * when read from a file.
653  */
654 Filemagic long0tab[] = {
655         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
656         /* "pac1" */
657         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
658         /* "pXc2 */
659         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
660         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
661         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
662         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
663         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
664         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
665         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
666         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
667         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
668         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
669         /* 0xfeedfacf */
670         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
671         /* 0xcefaedfe */
672         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
673         /* 0xcffaedfe */
674         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
675         /* 0xcafebabe */
676         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
677         /*
678          * venti & fossil magic numbers are stored big-endian on disk,
679          * thus the numbers appear reversed in this table.
680          */
681         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
682         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
683 };
684
685 int
686 filemagic(Filemagic *tab, int ntab, ulong x)
687 {
688         int i;
689
690         for(i=0; i<ntab; i++)
691                 if((x&tab[i].mask) == tab[i].x){
692                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
693                         return 1;
694                 }
695         return 0;
696 }
697
698 int
699 long0(void)
700 {
701         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
702 }
703
704 typedef struct Fileoffmag Fileoffmag;
705 struct Fileoffmag {
706         ulong   off;
707         Filemagic;
708 };
709
710 /*
711  * integers in this table must be as seen on a little-endian machine
712  * when read from a file.
713  */
714 Fileoffmag longofftab[] = {
715         /*
716          * venti & fossil magic numbers are stored big-endian on disk,
717          * thus the numbers appear reversed in this table.
718          */
719         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
720         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
721         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
722         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
723 };
724
725 int
726 fileoffmagic(Fileoffmag *tab, int ntab)
727 {
728         int i;
729         ulong x;
730         Fileoffmag *tp;
731         uchar buf[sizeof(long)];
732
733         for(i=0; i<ntab; i++) {
734                 tp = tab + i;
735                 seek(fd, tp->off, 0);
736                 if (readn(fd, buf, sizeof buf) != sizeof buf)
737                         continue;
738                 x = LENDIAN(buf);
739                 if((x&tp->mask) == tp->x){
740                         print("%s\n", mime ? tp->mime : tp->desc);
741                         return 1;
742                 }
743         }
744         return 0;
745 }
746
747 int
748 longoff(void)
749 {
750         return fileoffmagic(longofftab, nelem(longofftab));
751 }
752
753 int
754 isexec(void)
755 {
756         Fhdr f;
757
758         seek(fd, 0, 0);         /* reposition to start of file */
759         if(crackhdr(fd, &f)) {
760                 print("%s\n", mime ? OCTET : f.name);
761                 return 1;
762         }
763         return 0;
764 }
765
766
767 /* from tar.c */
768 enum { NAMSIZ = 100, TBLOCK = 512 };
769
770 union   hblock
771 {
772         char    dummy[TBLOCK];
773         struct  header
774         {
775                 char    name[NAMSIZ];
776                 char    mode[8];
777                 char    uid[8];
778                 char    gid[8];
779                 char    size[12];
780                 char    mtime[12];
781                 char    chksum[8];
782                 char    linkflag;
783                 char    linkname[NAMSIZ];
784                 /* rest are defined by POSIX's ustar format; see p1003.2b */
785                 char    magic[6];       /* "ustar" */
786                 char    version[2];
787                 char    uname[32];
788                 char    gname[32];
789                 char    devmajor[8];
790                 char    devminor[8];
791                 char    prefix[155];  /* if non-null, path = prefix "/" name */
792         } dbuf;
793 };
794
795 int
796 checksum(union hblock *hp)
797 {
798         int i;
799         char *cp;
800         struct header *hdr = &hp->dbuf;
801
802         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
803                 *cp = ' ';
804         i = 0;
805         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
806                 i += *cp & 0xff;
807         return i;
808 }
809
810 int
811 istar(void)
812 {
813         int chksum;
814         char tblock[TBLOCK];
815         union hblock *hp = (union hblock *)tblock;
816         struct header *hdr = &hp->dbuf;
817
818         seek(fd, 0, 0);         /* reposition to start of file */
819         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
820                 return 0;
821         chksum = strtol(hdr->chksum, 0, 8);
822         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
823                 if (strcmp(hdr->magic, "ustar") == 0)
824                         print(mime? "application/x-ustar\n": "posix tar archive\n");
825                 else
826                         print(mime? "application/x-tar\n": "tar archive\n");
827                 return 1;
828         }
829         return 0;
830 }
831
832 /*
833  * initial words to classify file
834  */
835 struct  FILE_STRING
836 {
837         char    *key;
838         char    *filetype;
839         int     length;
840         char    *mime;
841 } file_string[] =
842 {
843         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
844         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
845         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
846         "!<arch>\n__.SYMDEF",   "archive random library",       16,     "application/octet-stream",
847         "!<arch>\n",            "archive",                      8,      "application/octet-stream",
848         "070707",               "cpio archive - ascii header",  6,      "application/octet-stream",
849         "#!/bin/rc",            "rc executable file",           9,      "text/plain",
850         "#!/bin/sh",            "sh executable file",           9,      "text/plain",
851         "%!",                   "postscript",                   2,      "application/postscript",
852         "\004%!",               "postscript",                   3,      "application/postscript",
853         "x T post",             "troff output for post",        8,      "application/troff",
854         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
855         "x T utf",              "troff output for UTF",         7,      "application/troff",
856         "x T 202",              "troff output for 202",         7,      "application/troff",
857         "x T aps",              "troff output for aps",         7,      "application/troff",
858         "x T ",                 "troff output",                 4,      "application/troff",
859         "GIF",                  "GIF image",                    3,      "image/gif",
860         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
861         "%PDF",                 "PDF",                          4,      "application/pdf",
862         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
863         "<!doctype",            "HTML file",                    9,      "text/html",
864         "<!--",                 "HTML file",                    4,      "text/html",
865         "<html>",               "HTML file",                    6,      "text/html",
866         "<HTML>",               "HTML file",                    6,      "text/html",
867         "<?xml",                "HTML file",                    5,      "text/html",
868         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
869         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
870         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
871         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
872         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
873         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
874         "BM",                   "bmp",                          2,      "image/bmp", 
875         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
876         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
877         "\033E\033",    "HP PCL printer data",          3,      OCTET,
878         "\033&",        "HP PCL printer data",          2,      OCTET,
879         "\033%-12345X", "HPJCL file",           9,      "application/hpjcl",
880         "\033Lua",              "Lua bytecode",         4,      OCTET,
881         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
882         "OggS",                 "ogg audio",            4,      "audio/ogg",
883         ".snd",                 "sun audio",            4,      "audio/basic",
884         "\211PNG",              "PNG image",            4,      "image/png",
885         "P3\n",                 "ppm",                          3,      "image/ppm",
886         "P6\n",                 "ppm",                          3,      "image/ppm",
887         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
888         ".HTML ",               "troff -ms input",      6,      "text/troff",
889         ".LP",                  "troff -ms input",      3,      "text/troff",
890         ".ND",                  "troff -ms input",      3,      "text/troff",
891         ".PP",                  "troff -ms input",      3,      "text/troff",
892         ".TL",                  "troff -ms input",      3,      "text/troff",
893         ".TR",                  "troff -ms input",      3,      "text/troff",
894         ".TH",                  "manual page",          3,      "text/troff",
895         ".\\\"",                "troff input",          3,      "text/troff",
896         ".de",                  "troff input",          3,      "text/troff",
897         ".if",                  "troff input",          3,      "text/troff",
898         ".nr",                  "troff input",          3,      "text/troff",
899         ".tr",                  "troff input",          3,      "text/troff",
900         "vac:",                 "venti score",          4,      "text/plain",
901         "-----BEGIN CERTIFICATE-----\n",
902                                 "pem certificate",      -1,     "text/plain",
903         "-----BEGIN TRUSTED CERTIFICATE-----\n",
904                                 "pem trusted certificate", -1,  "text/plain",
905         "-----BEGIN X509 CERTIFICATE-----\n",
906                                 "pem x.509 certificate", -1,    "text/plain",
907         "subject=/C=",          "pem certificate with header", -1, "text/plain",
908         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
909         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
910         "[playlist]",           "playlist",             10,     "application/x-scpls",
911         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
912         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
913         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
914         0,0,0,0
915 };
916
917 int
918 istring(void)
919 {
920         int i, l;
921         struct FILE_STRING *p;
922
923         for(p = file_string; p->key; p++) {
924                 l = p->length;
925                 if(l == -1)
926                         l = strlen(p->key);
927                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
928                         print("%s\n", mime ? p->mime : p->filetype);
929                         return 1;
930                 }
931         }
932         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
933                 for(i = 5; i < nbuf; i++)
934                         if(buf[i] == '\n')
935                                 break;
936                 if(mime)
937                         print("%s\n", OCTET);
938                 else
939                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
940                 return 1;
941         }
942         return 0;
943 }
944
945 struct offstr
946 {
947         ulong   off;
948         struct FILE_STRING;
949 } offstrs[] = {
950         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
951         0, 0, 0, 0, 0
952 };
953
954 int
955 isoffstr(void)
956 {
957         int n;
958         char buf[256];
959         struct offstr *p;
960
961         for(p = offstrs; p->key; p++) {
962                 seek(fd, p->off, 0);
963                 n = p->length;
964                 if (n > sizeof buf)
965                         n = sizeof buf;
966                 if (readn(fd, buf, n) != n)
967                         continue;
968                 if(memcmp(buf, p->key, n) == 0) {
969                         print("%s\n", mime ? p->mime : p->filetype);
970                         return 1;
971                 }
972         }
973         return 0;
974 }
975
976 int
977 iff(void)
978 {
979         if (strncmp((char*)buf, "FORM", 4) == 0 &&
980             strncmp((char*)buf+8, "AIFF", 4) == 0) {
981                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
982                 return 1;
983         }
984         if (strncmp((char*)buf, "RIFF", 4) == 0) {
985                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
986                         print("%s\n", mime? "audio/wave": "wave audio");
987                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
988                         print("%s\n", mime? "video/avi": "avi video");
989                 else
990                         print("%s\n", mime? "application/octet-stream": "riff file");
991                 return 1;
992         }
993         return 0;
994 }
995
996 char*   html_string[] = {
997         "blockquote",
998         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
999         "caption",
1000         "button", "center", "iframe", "object", "option", "script",
1001         "select", "strong",
1002         "blink", "embed", "frame", "input", "label", "param", "small",
1003         "style", "table", "tbody", "tfoot", "thead", "title",
1004         "?xml", "body", "code", "font", "form", "head", "html",
1005         "link", "menu", "meta", "span",
1006         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
1007         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
1008         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
1009         "a", "b", "i", "p", "q", "u",
1010         0,
1011 };
1012
1013 int
1014 ishtml(void)
1015 {
1016         int i, n, count;
1017         uchar *p;
1018
1019         count = 0;
1020         p = buf;
1021         for(;;) {
1022                 while(p < buf+nbuf && *p != '<')
1023                         p++;
1024                 p++;
1025                 if (p >= buf+nbuf)
1026                         break;
1027                 if(*p == '/')
1028                         p++;
1029                 if(p >= buf+nbuf)
1030                         break;
1031                 for(i = 0; html_string[i]; i++){
1032                         n = strlen(html_string[i]);
1033                         if(p + n > buf+nbuf)
1034                                 continue;
1035                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1036                                 p += n;
1037                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1038                                         if(++count > 2) {
1039                                                 print("%s\n", mime ? "text/html" : "HTML file");
1040                                                 return 1;
1041                                         }
1042                                 }
1043                                 break;
1044                         }
1045                 }
1046         }
1047         return 0;
1048 }
1049
1050 char*   rfc822_string[] =
1051 {
1052         "from:",
1053         "date:",
1054         "to:",
1055         "subject:",
1056         "received:",
1057         "reply to:",
1058         "sender:",
1059         0,
1060 };
1061
1062 int
1063 isrfc822(void)
1064 {
1065
1066         char *p, *q, *r;
1067         int i, count;
1068
1069         count = 0;
1070         p = (char*)buf;
1071         for(;;) {
1072                 q = strchr(p, '\n');
1073                 if(q == nil)
1074                         break;
1075                 *q = 0;
1076                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1077                         count++;
1078                         *q = '\n';
1079                         p = q+1;
1080                         continue;
1081                 }
1082                 *q = '\n';
1083                 if(*p != '\t' && *p != ' '){
1084                         r = strchr(p, ':');
1085                         if(r == 0 || r > q)
1086                                 break;
1087                         for(i = 0; rfc822_string[i]; i++) {
1088                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1089                                         count++;
1090                                         break;
1091                                 }
1092                         }
1093                 }
1094                 p = q+1;
1095         }
1096         if(count >= 3){
1097                 print("%s\n", mime ? "message/rfc822" : "email file");
1098                 return 1;
1099         }
1100         return 0;
1101 }
1102
1103 int
1104 ismbox(void)
1105 {
1106         char *p, *q;
1107
1108         p = (char*)buf;
1109         q = strchr(p, '\n');
1110         if(q == nil)
1111                 return 0;
1112         *q = 0;
1113         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1114                 print("%s\n", mime ? "text/plain" : "mail box");
1115                 return 1;
1116         }
1117         *q = '\n';
1118         return 0;
1119 }
1120
1121 int
1122 iscint(void)
1123 {
1124         int type;
1125         char *name;
1126         Biobuf b;
1127
1128         if(Binit(&b, fd, OREAD) == Beof)
1129                 return 0;
1130         seek(fd, 0, 0);
1131         type = objtype(&b, &name);
1132         if(type < 0)
1133                 return 0;
1134         if(mime)
1135                 print("%s\n", OCTET);
1136         else
1137                 print("%s intermediate\n", name);
1138         return 1;
1139 }
1140
1141 int
1142 isc(void)
1143 {
1144         int n;
1145
1146         n = wfreq[I1];
1147         /*
1148          * includes
1149          */
1150         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1151                 goto yes;
1152         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1153                 goto yes;
1154         /*
1155          * declarations
1156          */
1157         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1158                 goto yes;
1159         /*
1160          * assignments
1161          */
1162         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1163                 goto yes;
1164         return 0;
1165
1166 yes:
1167         if(mime){
1168                 print("%s\n", PLAIN);
1169                 return 1;
1170         }
1171         if(wfreq[Alword] > 0)
1172                 print("alef program\n");
1173         else
1174                 print("c program\n");
1175         return 1;
1176 }
1177
1178 int
1179 islimbo(void)
1180 {
1181         /*
1182          * includes
1183          */
1184         if(wfreq[Lword] < 4)
1185                 return 0;
1186         print("%s\n", mime ? PLAIN : "limbo program");
1187         return 1;
1188 }
1189
1190 int
1191 isas(void)
1192 {
1193         /*
1194          * includes
1195          */
1196         if(wfreq[Aword] < 2)
1197                 return 0;
1198         print("%s\n", mime ? PLAIN : "as program");
1199         return 1;
1200 }
1201
1202 int
1203 istga(void)
1204 {
1205         uchar *p;
1206
1207         p = buf;
1208         if(nbuf < 18)
1209                 return 0;
1210         if((p[12] | p[13]<<8) == 0)     /* width */
1211                 return 0;
1212         if((p[14] | p[15]<<8) == 0)     /* height */
1213                 return 0;
1214         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1215                 return 0;
1216         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1217                 return 0;
1218         if(p[1] == 0){  /* non color-mapped */
1219                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1220                         return 0;
1221                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1222                         return 0;
1223         } else
1224         if(p[1] == 1){  /* color-mapped */
1225                 if((p[2]&3) != 1 || p[7] == 0)  
1226                         return 0;
1227                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1228                         return 0;
1229         } else
1230                 return 0;
1231         print("%s\n", mime ? "image/tga" : "targa image");
1232         return 1;
1233 }
1234
1235 int
1236 ismp3(void)
1237 {
1238         uchar *p, *e;
1239
1240         p = buf;
1241         e = p + nbuf-1;
1242         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1243                 if((p[1] & 0xFE) == 0xFA){
1244                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1245                         return 1;
1246                 }
1247                 p++;
1248         }
1249         return 0;
1250 }
1251
1252 /*
1253  * low entropy means encrypted
1254  */
1255 int
1256 ismung(void)
1257 {
1258         int i, bucket[8];
1259         float cs;
1260
1261         if(nbuf < 64)
1262                 return 0;
1263         memset(bucket, 0, sizeof(bucket));
1264         for(i=nbuf-64; i<nbuf; i++)
1265                 bucket[(buf[i]>>5)&07] += 1;
1266
1267         cs = 0.;
1268         for(i=0; i<8; i++)
1269                 cs += (bucket[i]-8)*(bucket[i]-8);
1270         cs /= 8.;
1271         if(cs <= 24.322) {
1272                 if(buf[0]==0x1f && buf[1]==0x9d)
1273                         print("%s\n", mime ? "application/x-compress" : "compressed");
1274                 else
1275                 if(buf[0]==0x1f && buf[1]==0x8b)
1276                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1277                 else
1278                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1279                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1280                 else
1281                 if(buf[0]==0x78 && buf[1]==0x9c)
1282                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1283                 else
1284                         print("%s\n", mime ? OCTET : "encrypted");
1285                 return 1;
1286         }
1287         return 0;
1288 }
1289
1290 /*
1291  * english by punctuation and frequencies
1292  */
1293 int
1294 isenglish(void)
1295 {
1296         int vow, comm, rare, badpun, punct;
1297         char *p;
1298
1299         if(guess != Fascii && guess != Feascii)
1300                 return 0;
1301         badpun = 0;
1302         punct = 0;
1303         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1304                 switch(*p) {
1305                 case '.':
1306                 case ',':
1307                 case ')':
1308                 case '%':
1309                 case ';':
1310                 case ':':
1311                 case '?':
1312                         punct++;
1313                         if(p[1] != ' ' && p[1] != '\n')
1314                                 badpun++;
1315                 }
1316         if(badpun*5 > punct)
1317                 return 0;
1318         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1319                 return 0;
1320         if(2*cfreq[';'] > cfreq['e'])
1321                 return 0;
1322
1323         vow = 0;
1324         for(p="AEIOU"; *p; p++) {
1325                 vow += cfreq[*p];
1326                 vow += cfreq[tolower(*p)];
1327         }
1328         comm = 0;
1329         for(p="ETAION"; *p; p++) {
1330                 comm += cfreq[*p];
1331                 comm += cfreq[tolower(*p)];
1332         }
1333         rare = 0;
1334         for(p="VJKQXZ"; *p; p++) {
1335                 rare += cfreq[*p];
1336                 rare += cfreq[tolower(*p)];
1337         }
1338         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1339                 print("%s\n", mime ? PLAIN : "English text");
1340                 return 1;
1341         }
1342         return 0;
1343 }
1344
1345 /*
1346  * pick up a number with
1347  * syntax _*[0-9]+_
1348  */
1349 #define P9BITLEN        12
1350 int
1351 p9bitnum(uchar *bp)
1352 {
1353         int n, c, len;
1354
1355         len = P9BITLEN;
1356         while(*bp == ' ') {
1357                 bp++;
1358                 len--;
1359                 if(len <= 0)
1360                         return -1;
1361         }
1362         n = 0;
1363         while(len > 1) {
1364                 c = *bp++;
1365                 if(!isdigit(c))
1366                         return -1;
1367                 n = n*10 + c-'0';
1368                 len--;
1369         }
1370         if(*bp != ' ')
1371                 return -1;
1372         return n;
1373 }
1374
1375 int
1376 depthof(char *s, int *newp)
1377 {
1378         char *es;
1379         int d;
1380
1381         *newp = 0;
1382         es = s+12;
1383         while(s<es && *s==' ')
1384                 s++;
1385         if(s == es)
1386                 return -1;
1387         if('0'<=*s && *s<='9')
1388                 return 1<<strtol(s, 0, 0);
1389
1390         *newp = 1;
1391         d = 0;
1392         while(s<es && *s!=' '){
1393                 s++;                    /* skip letter */
1394                 d += strtoul(s, &s, 10);
1395         }
1396
1397         if(d % 8 == 0 || 8 % d == 0)
1398                 return d;
1399         else
1400                 return -1;
1401 }
1402
1403 int
1404 isp9bit(void)
1405 {
1406         int dep, lox, loy, hix, hiy, px, new, cmpr;
1407         ulong t;
1408         long len;
1409         char *newlabel;
1410         uchar *cp;
1411
1412         cp = buf;
1413         cmpr = 0;
1414         newlabel = "old ";
1415
1416         if(memcmp(cp, "compressed\n", 11) == 0) {
1417                 cmpr = 1;
1418                 cp = buf + 11;
1419         }
1420
1421         dep = depthof((char*)cp + 0*P9BITLEN, &new);
1422         if(new)
1423                 newlabel = "";
1424         lox = p9bitnum(cp + 1*P9BITLEN);
1425         loy = p9bitnum(cp + 2*P9BITLEN);
1426         hix = p9bitnum(cp + 3*P9BITLEN);
1427         hiy = p9bitnum(cp + 4*P9BITLEN);
1428         if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
1429                 return 0;
1430
1431         if(dep < 8){
1432                 px = 8/dep;             /* pixels per byte */
1433                 /* set l to number of bytes of data per scan line */
1434                 if(lox >= 0)
1435                         len = (hix+px-1)/px - lox/px;
1436                 else{                   /* make positive before divide */
1437                         t = (-lox)+px-1;
1438                         t = (t/px)*px;
1439                         len = (t+hix+px-1)/px;
1440                 }
1441         }else
1442                 len = (hix-lox)*dep/8;
1443         len *= hiy - loy;               /* col length */
1444         len += 5 * P9BITLEN;            /* size of initial ascii */
1445
1446         /*
1447          * for compressed images, don't look any further. otherwise:
1448          * for image file, length is non-zero and must match calculation above.
1449          * for /dev/window and /dev/screen the length is always zero.
1450          * for subfont, the subfont header should follow immediately.
1451          */
1452         if (cmpr) {
1453                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
1454                         newlabel, dep);
1455                 return 1;
1456         }
1457         /*
1458          * mbuf->length == 0 probably indicates reading a pipe.
1459          * Ghostscript sometimes produces a little extra on the end.
1460          */
1461         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1462             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1463                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
1464                 return 1;
1465         }
1466         if (p9subfont(buf+len)) {
1467                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
1468                 return 1;
1469         }
1470         return 0;
1471 }
1472
1473 int
1474 p9subfont(uchar *p)
1475 {
1476         int n, h, a;
1477
1478         /* if image too big, assume it's a subfont */
1479         if (p+3*P9BITLEN > buf+sizeof(buf))
1480                 return 1;
1481
1482         n = p9bitnum(p + 0*P9BITLEN);   /* char count */
1483         if (n < 0)
1484                 return 0;
1485         h = p9bitnum(p + 1*P9BITLEN);   /* height */
1486         if (h < 0)
1487                 return 0;
1488         a = p9bitnum(p + 2*P9BITLEN);   /* ascent */
1489         if (a < 0)
1490                 return 0;
1491         return 1;
1492 }
1493
1494 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1495
1496 int
1497 isp9font(void)
1498 {
1499         uchar *cp, *p;
1500         int i, n;
1501         char pathname[1024];
1502
1503         cp = buf;
1504         if (!getfontnum(cp, &cp))       /* height */
1505                 return 0;
1506         if (!getfontnum(cp, &cp))       /* ascent */
1507                 return 0;
1508         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1509                 if (!getfontnum(cp, &cp))       /* min */
1510                         break;
1511                 if (!getfontnum(cp, &cp))       /* max */
1512                         return 0;
1513                 getfontnum(cp, &cp);    /* optional offset */
1514                 while (WHITESPACE(*cp))
1515                         cp++;
1516                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1517                                 ;
1518                         /* construct a path name, if needed */
1519                 n = 0;
1520                 if (*p != '/' && slash) {
1521                         n = slash-fname+1;
1522                         if (n < sizeof(pathname))
1523                                 memcpy(pathname, fname, n);
1524                         else n = 0;
1525                 }
1526                 if (n+cp-p+4 < sizeof(pathname)) {
1527                         memcpy(pathname+n, p, cp-p);
1528                         n += cp-p;
1529                         pathname[n] = 0;
1530                         if (access(pathname, AEXIST) < 0) {
1531                                 strcpy(pathname+n, ".0");
1532                                 if (access(pathname, AEXIST) < 0)
1533                                         return 0;
1534                         }
1535                 }
1536         }
1537         if (i) {
1538                 print(mime ? "text/plain\n" : "font file\n");
1539                 return 1;
1540         }
1541         return 0;
1542 }
1543
1544 int
1545 getfontnum(uchar *cp, uchar **rp)
1546 {
1547         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1548                 cp++;
1549         if (*cp < '0' || *cp > '9')
1550                 return 0;
1551         strtoul((char *)cp, (char **)rp, 0);
1552         if (!WHITESPACE(**rp)) {
1553                 *rp = cp;
1554                 return 0;
1555         }
1556         return 1;
1557 }
1558
1559 int
1560 isrtf(void)
1561 {
1562         if(strstr((char *)buf, "\\rtf1")){
1563                 print(mime ? "application/rtf\n" : "rich text format\n");
1564                 return 1;
1565         }
1566         return 0;
1567 }
1568
1569 int
1570 ismsdos(void)
1571 {
1572         if (buf[0] == 0x4d && buf[1] == 0x5a){
1573                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1574                 return 1;
1575         }
1576         return 0;
1577 }
1578
1579 int
1580 isicocur(void)
1581 {
1582         if(buf[0] || buf[1] || buf[3] || buf[9])
1583                 return 0;
1584         if(buf[4] == 0x00 && buf[5] == 0x00)
1585                 return 0;
1586         switch(buf[2]){
1587         case 1:
1588                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1589                 return 1;
1590         case 2:
1591                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1592                 return 1;
1593         }
1594         return 0;
1595 }
1596
1597 int
1598 iself(void)
1599 {
1600         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1601         [1]     "WE32100",
1602         [2]     "SPARC",
1603         [3]     "i386",
1604         [4]     "M68000",
1605         [5]     "M88000",
1606         [6]     "i486",
1607         [7]     "i860",
1608         [8]     "R3000",
1609         [9]     "S370",
1610         [10]    "R4000",
1611         [15]    "HP-PA",
1612         [18]    "sparc v8+",
1613         [19]    "i960",
1614         [20]    "PPC-32",
1615         [21]    "PPC-64",
1616         [40]    "ARM",
1617         [41]    "Alpha",
1618         [43]    "sparc v9",
1619         [50]    "IA-64",
1620         [62]    "AMD64",
1621         [75]    "VAX",
1622         };
1623         static char *type[] = {
1624         [1]     "relocatable object",
1625         [2]     "executable",
1626         [3]     "shared library",
1627         [4]     "core dump",
1628         };
1629
1630         if (memcmp(buf, "\x7fELF", 4) == 0){
1631                 if (!mime){
1632                         int isdifend = 0;
1633                         int n = (buf[19] << 8) | buf[18];
1634                         char *p = "unknown";
1635                         char *t = "unknown";
1636
1637                         if (n > 0 && n < nelem(cpu) && cpu[n])
1638                                 p = cpu[n];
1639                         else {
1640                                 /* try the other byte order */
1641                                 isdifend = 1;
1642                                 n = (buf[18] << 8) | buf[19];
1643                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1644                                         p = cpu[n];
1645                         }
1646                         if(isdifend)
1647                                 n = (buf[16]<< 8) | buf[17];
1648                         else
1649                                 n = (buf[17]<< 8) | buf[16];
1650
1651                         if(n>0 && n < nelem(type) && type[n])
1652                                 t = type[n];
1653                         print("%s ELF %s\n", p, t);
1654                 }
1655                 else
1656                         print("application/x-elf-executable\n");
1657                 return 1;
1658         }
1659
1660         return 0;
1661 }
1662
1663 int
1664 isface(void)
1665 {
1666         int i, j, ldepth, l;
1667         char *p;
1668
1669         ldepth = -1;
1670         for(j = 0; j < 3; j++){
1671                 for(p = (char*)buf, i=0; i<3; i++){
1672                         if(p[0] != '0' || p[1] != 'x')
1673                                 return 0;
1674                         if(buf[2+8] == ',')
1675                                 l = 2;
1676                         else if(buf[2+4] == ',')
1677                                 l = 1;
1678                         else
1679                                 return 0;
1680                         if(ldepth == -1)
1681                                 ldepth = l;
1682                         if(l != ldepth)
1683                                 return 0;
1684                         strtoul(p, &p, 16);
1685                         if(*p++ != ',')
1686                                 return 0;
1687                         while(*p == ' ' || *p == '\t')
1688                                 p++;
1689                 }
1690                 if (*p++ != '\n')
1691                         return 0;
1692         }
1693
1694         if(mime)
1695                 print("application/x-face\n");
1696         else
1697                 print("face image depth %d\n", ldepth);
1698         return 1;
1699 }
1700