]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/file.c
upas/fs: fix more locking bugs, remove debugging clutter, remove planb mbox code
[plan9front.git] / sys / src / cmd / file.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include <mach.h>
6
7 /*
8  * file - determine type of file
9  */
10 #define LENDIAN(p)      ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
11
12 uchar   buf[6001];
13 short   cfreq[140];
14 short   wfreq[50];
15 int     nbuf;
16 Dir*    mbuf;
17 int     fd;
18 char    *fname;
19 char    *slash;
20
21 enum
22 {
23         Cword,
24         Fword,
25         Aword,
26         Alword,
27         Lword,
28         I1,
29         I2,
30         I3,
31         Clatin  = 128,
32         Cbinary,
33         Cnull,
34         Ceascii,
35         Cutf,
36 };
37 struct
38 {
39         char*   word;
40         int     class;
41 } dict[] =
42 {
43         "PATH",         Lword,
44         "TEXT",         Aword,
45         "adt",          Alword,
46         "aggr",         Alword,
47         "alef",         Alword,
48         "array",        Lword,
49         "block",        Fword,
50         "char",         Cword,
51         "common",       Fword,
52         "con",          Lword,
53         "data",         Fword,
54         "dimension",    Fword,
55         "double",       Cword,
56         "extern",       Cword,
57         "bio",          I2,
58         "float",        Cword,
59         "fn",           Lword,
60         "function",     Fword,
61         "h",            I3,
62         "implement",    Lword,
63         "import",       Lword,
64         "include",      I1,
65         "int",          Cword,
66         "integer",      Fword,
67         "iota",         Lword,
68         "libc",         I2,
69         "long",         Cword,
70         "module",       Lword,
71         "real",         Fword,
72         "ref",          Lword,
73         "register",     Cword,
74         "self",         Lword,
75         "short",        Cword,
76         "static",       Cword,
77         "stdio",        I2,
78         "struct",       Cword,
79         "subroutine",   Fword,
80         "u",            I2,
81         "void",         Cword,
82 };
83
84 /* codes for 'mode' field in language structure */
85 enum    {
86                 Normal  = 0,
87                 First,          /* first entry for language spanning several ranges */
88                 Multi,          /* later entries "   "       "  ... */
89                 Shared,         /* codes used in several languages */
90         };
91
92 struct
93 {
94         int     mode;           /* see enum above */
95         int     count;
96         int     low;
97         int     high;
98         char    *name;
99
100 } language[] =
101 {
102         Normal, 0,      0x0100, 0x01FF, "Extended Latin",
103         Normal, 0,      0x0370, 0x03FF, "Greek",
104         Normal, 0,      0x0400, 0x04FF, "Cyrillic",
105         Normal, 0,      0x0530, 0x058F, "Armenian",
106         Normal, 0,      0x0590, 0x05FF, "Hebrew",
107         Normal, 0,      0x0600, 0x06FF, "Arabic",
108         Normal, 0,      0x0900, 0x097F, "Devanagari",
109         Normal, 0,      0x0980, 0x09FF, "Bengali",
110         Normal, 0,      0x0A00, 0x0A7F, "Gurmukhi",
111         Normal, 0,      0x0A80, 0x0AFF, "Gujarati",
112         Normal, 0,      0x0B00, 0x0B7F, "Oriya",
113         Normal, 0,      0x0B80, 0x0BFF, "Tamil",
114         Normal, 0,      0x0C00, 0x0C7F, "Telugu",
115         Normal, 0,      0x0C80, 0x0CFF, "Kannada",
116         Normal, 0,      0x0D00, 0x0D7F, "Malayalam",
117         Normal, 0,      0x0E00, 0x0E7F, "Thai",
118         Normal, 0,      0x0E80, 0x0EFF, "Lao",
119         Normal, 0,      0x1000, 0x105F, "Tibetan",
120         Normal, 0,      0x10A0, 0x10FF, "Georgian",
121         Normal, 0,      0x3040, 0x30FF, "Japanese",
122         Normal, 0,      0x3100, 0x312F, "Chinese",
123         First,  0,      0x3130, 0x318F, "Korean",
124         Multi,  0,      0x3400, 0x3D2F, "Korean",
125         Shared, 0,      0x4e00, 0x9fff, "CJK",
126         Normal, 0,      0,      0,      0,              /* terminal entry */
127 };
128
129
130 enum
131 {
132         Fascii,         /* printable ascii */
133         Flatin,         /* latin 1*/
134         Futf,           /* UTF character set */
135         Fbinary,        /* binary */
136         Feascii,        /* ASCII with control chars */
137         Fnull,          /* NULL in file */
138 } guess;
139
140 void    bump_utf_count(Rune);
141 int     cistrncmp(char*, char*, int);
142 void    filetype(int);
143 int     getfontnum(uchar*, uchar**);
144 int     isas(void);
145 int     isc(void);
146 int     iscint(void);
147 int     isenglish(void);
148 int     ishp(void);
149 int     ishtml(void);
150 int     isrfc822(void);
151 int     ismbox(void);
152 int     islimbo(void);
153 int     istga(void);
154 int     ismp3(void);
155 int     ismung(void);
156 int     isp9bit(void);
157 int     isp9font(void);
158 int     isrtf(void);
159 int     ismsdos(void);
160 int     isicocur(void);
161 int     iself(void);
162 int     istring(void);
163 int     isoffstr(void);
164 int     iff(void);
165 int     long0(void);
166 int     longoff(void);
167 int     istar(void);
168 int     isface(void);
169 int     isexec(void);
170 int     p9bitnum(char*, int*);
171 int     p9subfont(uchar*);
172 void    print_utf(void);
173 void    type(char*, int);
174 int     utf_count(void);
175 void    wordfreq(void);
176
177 int     (*call[])(void) =
178 {
179         long0,          /* recognizable by first 4 bytes */
180         istring,        /* recognizable by first string */
181         iself,          /* ELF (foreign) executable */
182         isexec,         /* native executables */
183         iff,            /* interchange file format (strings) */
184         longoff,        /* recognizable by 4 bytes at some offset */
185         isoffstr,       /* recognizable by string at some offset */
186         isrfc822,       /* email file */
187         ismbox,         /* mail box */
188         istar,          /* recognizable by tar checksum */
189         iscint,         /* compiler/assembler intermediate */
190         ishtml,         /* html keywords */
191         islimbo,        /* limbo source */
192         isc,            /* c & alef compiler key words */
193         isas,           /* assembler key words */
194         isp9font,       /* plan 9 font */
195         isp9bit,        /* plan 9 image (as from /dev/window) */
196         isrtf,          /* rich text format */
197         ismsdos,        /* msdos exe (virus file attachement) */
198         isicocur,               /* windows icon or cursor file */
199         isface,         /* ascii face file */
200         istga,
201         ismp3,
202
203         /* last resorts */
204         ismung,         /* entropy compressed/encrypted */
205         isenglish,      /* char frequency English */
206         0
207 };
208
209 int mime;
210
211 char OCTET[] =  "application/octet-stream";
212 char PLAIN[] =  "text/plain";
213
214 void
215 main(int argc, char *argv[])
216 {
217         int i, j, maxlen;
218         char *cp;
219         Rune r;
220
221         ARGBEGIN{
222         case 'm':
223                 mime = 1;
224                 break;
225         default:
226                 fprint(2, "usage: file [-m] [file...]\n");
227                 exits("usage");
228         }ARGEND;
229
230         maxlen = 0;
231         if(mime == 0 || argc > 1){
232                 for(i = 0; i < argc; i++) {
233                         for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
234                                         ;
235                         if(j > maxlen)
236                                 maxlen = j;
237                 }
238         }
239         if (argc <= 0) {
240                 if(!mime)
241                         print ("stdin: ");
242                 filetype(0);
243         }
244         else {
245                 for(i = 0; i < argc; i++)
246                         type(argv[i], maxlen);
247         }
248         exits(0);
249 }
250
251 void
252 type(char *file, int nlen)
253 {
254         Rune r;
255         int i;
256         char *p;
257
258         if(nlen > 0){
259                 slash = 0;
260                 for (i = 0, p = file; *p; i++) {
261                         if (*p == '/')                  /* find rightmost slash */
262                                 slash = p;
263                         p += chartorune(&r, p);         /* count runes */
264                 }
265                 print("%s:%*s",file, nlen-i+1, "");
266         }
267         fname = file;
268         if ((fd = open(file, OREAD)) < 0) {
269                 fprint(2, "cannot open: %r\n");
270                 return;
271         }
272         filetype(fd);
273         close(fd);
274 }
275
276 void
277 utfconv(void)
278 {
279         Rune r;
280         uchar *rb;
281         char *p, *e;
282         int i;
283
284         if(nbuf < 4)
285                 return;
286
287         if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
288                 if(!mime)
289                         print("utf-32be ");
290                 return;
291         } else
292         if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
293                 if(!mime)
294                         print("utf-32le ");
295                 return;
296         } else
297         if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
298                 memmove(buf, buf+3, nbuf-3);
299                 nbuf -= 3;
300                 return;
301         } else
302         if(memcmp(buf, "\xFE\xFF", 2) == 0){
303                 if(!mime)
304                         print("utf-16be ");
305
306                 nbuf -= 2;
307                 rb = malloc(nbuf+1);
308                 memmove(rb, buf+2, nbuf);
309                 p = (char*)buf;
310                 e = p+sizeof(buf)-UTFmax-1;
311                 for(i=0; i<nbuf && p < e; i+=2){
312                         r = rb[i+1] | rb[i]<<8;
313                         p += runetochar(p, &r);
314                 }
315                 *p = 0;
316                 free(rb);
317                 nbuf = p - (char*)buf;
318         } else
319         if(memcmp(buf, "\xFF\xFE", 2) == 0){
320                 if(!mime)
321                         print("utf-16le ");
322
323                 nbuf -= 2;
324                 rb = malloc(nbuf+1);
325                 memmove(rb, buf+2, nbuf);
326                 p = (char*)buf;
327                 e = p+sizeof(buf)-UTFmax-1;
328                 for(i=0; i<nbuf && p < e; i+=2){
329                         r = rb[i] | rb[i+1]<<8;
330                         p += runetochar(p, &r);
331                 }
332                 *p = 0;
333                 free(rb);
334                 nbuf = p - (char*)buf;
335         }
336 }
337
338 void
339 filetype(int fd)
340 {
341         Rune r;
342         int i, f, n;
343         char *p, *eob;
344
345         free(mbuf);
346         mbuf = dirfstat(fd);
347         if(mbuf == nil){
348                 fprint(2, "cannot stat: %r\n");
349                 return;
350         }
351         if(mbuf->mode & DMDIR) {
352                 print("%s\n", mime ? OCTET : "directory");
353                 return;
354         }
355         if(mbuf->type != 'M' && mbuf->type != '|') {
356                 if(mime)
357                         print("%s\n", OCTET);
358                 else
359                         print("special file #%C/%s\n", mbuf->type, mbuf->name);
360                 return;
361         }
362         /* may be reading a pipe on standard input */
363         nbuf = readn(fd, buf, sizeof(buf)-1);
364         if(nbuf < 0) {
365                 fprint(2, "cannot read: %r\n");
366                 return;
367         }
368         if(nbuf == 0) {
369                 print("%s\n", mime ? PLAIN : "empty file");
370                 return;
371         }
372         buf[nbuf] = 0;
373
374         utfconv();
375
376         /*
377          * build histogram table
378          */
379         memset(cfreq, 0, sizeof(cfreq));
380         for (i = 0; language[i].name; i++)
381                 language[i].count = 0;
382         eob = (char *)buf+nbuf;
383         for(n = 0, p = (char *)buf; p < eob; n++) {
384                 if (!fullrune(p, eob-p) && eob-p < UTFmax)
385                         break;
386                 p += chartorune(&r, p);
387                 if (r == 0)
388                         f = Cnull;
389                 else if (r <= 0x7f) {
390                         if (!isprint(r) && !isspace(r))
391                                 f = Ceascii;    /* ASCII control char */
392                         else f = r;
393                 } else if (r == 0x80) {
394                         bump_utf_count(r);
395                         f = Cutf;
396                 } else if (r < 0xA0)
397                         f = Cbinary;    /* Invalid Runes */
398                 else if (r <= 0xff)
399                         f = Clatin;     /* Latin 1 */
400                 else {
401                         bump_utf_count(r);
402                         f = Cutf;               /* UTF extension */
403                 }
404                 cfreq[f]++;                     /* ASCII chars peg directly */
405         }
406         /*
407          * gross classify
408          */
409         if (cfreq[Cbinary])
410                 guess = Fbinary;
411         else if (cfreq[Cutf])
412                 guess = Futf;
413         else if (cfreq[Clatin])
414                 guess = Flatin;
415         else if (cfreq[Ceascii])
416                 guess = Feascii;
417         else if (cfreq[Cnull])
418                 guess = Fbinary;
419         else
420                 guess = Fascii;
421         /*
422          * lookup dictionary words
423          */
424         memset(wfreq, 0, sizeof(wfreq));
425         if(guess == Fascii || guess == Flatin || guess == Futf)
426                 wordfreq();
427         /*
428          * call individual classify routines
429          */
430         for(i=0; call[i]; i++)
431                 if((*call[i])())
432                         return;
433
434         /*
435          * if all else fails,
436          * print out gross classification
437          */
438         if (nbuf < 100 && !mime)
439                 print(mime ? PLAIN : "short ");
440         if (guess == Fascii)
441                 print("%s\n", mime ? PLAIN : "Ascii");
442         else if (guess == Feascii)
443                 print("%s\n", mime ? PLAIN : "extended ascii");
444         else if (guess == Flatin)
445                 print("%s\n", mime ? PLAIN : "latin ascii");
446         else if (guess == Futf && utf_count() < 4)
447                 print_utf();
448         else print("%s\n", mime ? OCTET : "binary");
449 }
450
451 void
452 bump_utf_count(Rune r)
453 {
454         int low, high, mid;
455
456         high = sizeof(language)/sizeof(language[0])-1;
457         for (low = 0; low < high;) {
458                 mid = (low+high)/2;
459                 if (r >= language[mid].low) {
460                         if (r <= language[mid].high) {
461                                 language[mid].count++;
462                                 break;
463                         } else low = mid+1;
464                 } else high = mid;
465         }
466 }
467
468 int
469 utf_count(void)
470 {
471         int i, count;
472
473         count = 0;
474         for (i = 0; language[i].name; i++)
475                 if (language[i].count > 0)
476                         switch (language[i].mode) {
477                         case Normal:
478                         case First:
479                                 count++;
480                                 break;
481                         default:
482                                 break;
483                         }
484         return count;
485 }
486
487 int
488 chkascii(void)
489 {
490         int i;
491
492         for (i = 'a'; i < 'z'; i++)
493                 if (cfreq[i])
494                         return 1;
495         for (i = 'A'; i < 'Z'; i++)
496                 if (cfreq[i])
497                         return 1;
498         return 0;
499 }
500
501 int
502 find_first(char *name)
503 {
504         int i;
505
506         for (i = 0; language[i].name != 0; i++)
507                 if (language[i].mode == First
508                         && strcmp(language[i].name, name) == 0)
509                         return i;
510         return -1;
511 }
512
513 void
514 print_utf(void)
515 {
516         int i, printed, j;
517
518         if(mime){
519                 print("%s\n", PLAIN);
520                 return;
521         }
522         if (chkascii()) {
523                 printed = 1;
524                 print("Ascii");
525         } else
526                 printed = 0;
527         for (i = 0; language[i].name; i++)
528                 if (language[i].count) {
529                         switch(language[i].mode) {
530                         case Multi:
531                                 j = find_first(language[i].name);
532                                 if (j < 0)
533                                         break;
534                                 if (language[j].count > 0)
535                                         break;
536                                 /* Fall through */
537                         case Normal:
538                         case First:
539                                 if (printed)
540                                         print(" & ");
541                                 else printed = 1;
542                                 print("%s", language[i].name);
543                                 break;
544                         case Shared:
545                         default:
546                                 break;
547                         }
548                 }
549         if(!printed)
550                 print("UTF");
551         print(" text\n");
552 }
553
554 void
555 wordfreq(void)
556 {
557         int low, high, mid, r;
558         uchar *p, *p2, c;
559
560         p = buf;
561         for(;;) {
562                 while (p < buf+nbuf && !isalpha(*p))
563                         p++;
564                 if (p >= buf+nbuf)
565                         return;
566                 p2 = p;
567                 while(p < buf+nbuf && isalpha(*p))
568                         p++;
569                 c = *p;
570                 *p = 0;
571                 high = sizeof(dict)/sizeof(dict[0]);
572                 for(low = 0;low < high;) {
573                         mid = (low+high)/2;
574                         r = strcmp(dict[mid].word, (char*)p2);
575                         if(r == 0) {
576                                 wfreq[dict[mid].class]++;
577                                 break;
578                         }
579                         if(r < 0)
580                                 low = mid+1;
581                         else
582                                 high = mid;
583                 }
584                 *p++ = c;
585         }
586 }
587
588 typedef struct Filemagic Filemagic;
589 struct Filemagic {
590         ulong x;
591         ulong mask;
592         char *desc;
593         char *mime;
594 };
595
596 /*
597  * integers in this table must be as seen on a little-endian machine
598  * when read from a file.
599  */
600 Filemagic long0tab[] = {
601         0xF16DF16D,     0xFFFFFFFF,     "pac1 audio file",      OCTET,
602         /* "pac1" */
603         0x31636170,     0xFFFFFFFF,     "pac3 audio file",      OCTET,
604         /* "pXc2 */
605         0x32630070,     0xFFFF00FF,     "pac4 audio file",      OCTET,
606         0xBA010000,     0xFFFFFFFF,     "mpeg system stream",   OCTET,
607         0x43614c66,     0xFFFFFFFF,     "FLAC audio file",      "audio/flac",
608         0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable", OCTET,
609         0x04034B50,     0xFFFFFFFF,     "zip archive", "application/zip",
610         070707,         0xFFFF,         "cpio archive", "application/x-cpio",
611         0x2F7,          0xFFFF,         "tex dvi", "application/dvi",
612         0xfaff,         0xfeff,         "mp3 audio",    "audio/mpeg",
613         0xf0ff,         0xf6ff,         "aac audio",    "audio/mpeg",
614         /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
615         0xcefaedfe,     0xFFFFFFFF,     "32-bit power Mach-O executable", OCTET,
616         /* 0xfeedfacf */
617         0xcffaedfe,     0xFFFFFFFF,     "64-bit power Mach-O executable", OCTET,
618         /* 0xcefaedfe */
619         0xfeedface,     0xFFFFFFFF,     "386 Mach-O executable", OCTET,
620         /* 0xcffaedfe */
621         0xfeedfacf,     0xFFFFFFFF,     "amd64 Mach-O executable", OCTET,
622         /* 0xcafebabe */
623         0xbebafeca,     0xFFFFFFFF,     "Mach-O universal executable", OCTET,
624         /*
625          * venti & fossil magic numbers are stored big-endian on disk,
626          * thus the numbers appear reversed in this table.
627          */
628         0xad4e5cd1,     0xFFFFFFFF,     "venti arena", OCTET,
629         0x2bb19a52,     0xFFFFFFFF,     "paq archive", OCTET,
630         0x1a53454e,     0xFFFFFFFF,     "NES ROM", OCTET,
631         /* tcpdump pcap file */
632         0xa1b2c3d4,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
633         0xd4c3b2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
634         0xa1b23c4d,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
635         0x4d3cb2a1,     0xFFFFFFFF,     "pcap file",    "application/vnd.tcpdump.pcap",
636 };
637
638 int
639 filemagic(Filemagic *tab, int ntab, ulong x)
640 {
641         int i;
642
643         for(i=0; i<ntab; i++)
644                 if((x&tab[i].mask) == tab[i].x){
645                         print("%s\n", mime ? tab[i].mime : tab[i].desc);
646                         return 1;
647                 }
648         return 0;
649 }
650
651 int
652 long0(void)
653 {
654         return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
655 }
656
657 typedef struct Fileoffmag Fileoffmag;
658 struct Fileoffmag {
659         ulong   off;
660         Filemagic;
661 };
662
663 /*
664  * integers in this table must be as seen on a little-endian machine
665  * when read from a file.
666  */
667 Fileoffmag longofftab[] = {
668         /*
669          * venti & fossil magic numbers are stored big-endian on disk,
670          * thus the numbers appear reversed in this table.
671          */
672         256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
673         256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
674         128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
675         4,        0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
676         0x100,    0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
677         0x1fc,    0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
678 };
679
680 int
681 fileoffmagic(Fileoffmag *tab, int ntab)
682 {
683         int i;
684         ulong x;
685         Fileoffmag *tp;
686         uchar buf[sizeof(long)];
687
688         for(i=0; i<ntab; i++) {
689                 tp = tab + i;
690                 seek(fd, tp->off, 0);
691                 if (readn(fd, buf, sizeof buf) != sizeof buf)
692                         continue;
693                 x = LENDIAN(buf);
694                 if((x&tp->mask) == tp->x){
695                         print("%s\n", mime ? tp->mime : tp->desc);
696                         return 1;
697                 }
698         }
699         return 0;
700 }
701
702 int
703 longoff(void)
704 {
705         return fileoffmagic(longofftab, nelem(longofftab));
706 }
707
708 int
709 isexec(void)
710 {
711         Fhdr f;
712
713         seek(fd, 0, 0);         /* reposition to start of file */
714         if(crackhdr(fd, &f)) {
715                 print("%s\n", mime ? OCTET : f.name);
716                 return 1;
717         }
718         return 0;
719 }
720
721
722 /* from tar.c */
723 enum { NAMSIZ = 100, TBLOCK = 512 };
724
725 union   hblock
726 {
727         char    dummy[TBLOCK];
728         struct  header
729         {
730                 char    name[NAMSIZ];
731                 char    mode[8];
732                 char    uid[8];
733                 char    gid[8];
734                 char    size[12];
735                 char    mtime[12];
736                 char    chksum[8];
737                 char    linkflag;
738                 char    linkname[NAMSIZ];
739                 /* rest are defined by POSIX's ustar format; see p1003.2b */
740                 char    magic[6];       /* "ustar" */
741                 char    version[2];
742                 char    uname[32];
743                 char    gname[32];
744                 char    devmajor[8];
745                 char    devminor[8];
746                 char    prefix[155];  /* if non-null, path = prefix "/" name */
747         } dbuf;
748 };
749
750 int
751 checksum(union hblock *hp)
752 {
753         int i;
754         char *cp;
755         struct header *hdr = &hp->dbuf;
756
757         for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
758                 *cp = ' ';
759         i = 0;
760         for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
761                 i += *cp & 0xff;
762         return i;
763 }
764
765 int
766 istar(void)
767 {
768         int chksum;
769         char tblock[TBLOCK];
770         union hblock *hp = (union hblock *)tblock;
771         struct header *hdr = &hp->dbuf;
772
773         seek(fd, 0, 0);         /* reposition to start of file */
774         if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
775                 return 0;
776         chksum = strtol(hdr->chksum, 0, 8);
777         if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
778                 if (strcmp(hdr->magic, "ustar") == 0)
779                         print(mime? "application/x-ustar\n": "posix tar archive\n");
780                 else
781                         print(mime? "application/x-tar\n": "tar archive\n");
782                 return 1;
783         }
784         return 0;
785 }
786
787 /*
788  * initial words to classify file
789  */
790 struct  FILE_STRING
791 {
792         char    *key;
793         char    *filetype;
794         int     length;
795         char    *mime;
796 } file_string[] =
797 {
798         "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
799         "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
800         "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
801         "!<arch>\n__.SYMDEF",   "archive random library",       16,     OCTET,
802         "!<arch>\n",            "archive",                      8,      OCTET,
803         "070707",               "cpio archive - ascii header",  6,      OCTET,
804         "#!/bin/rc",            "rc executable file",           9,      PLAIN,
805         "#!/bin/sh",            "sh executable file",           9,      PLAIN,
806         "%!",                   "postscript",                   2,      "application/postscript",
807         "\004%!",               "postscript",                   3,      "application/postscript",
808         "x T post",             "troff output for post",        8,      "application/troff",
809         "x T Latin1",           "troff output for Latin1",      10,     "application/troff",
810         "x T utf",              "troff output for UTF",         7,      "application/troff",
811         "x T 202",              "troff output for 202",         7,      "application/troff",
812         "x T aps",              "troff output for aps",         7,      "application/troff",
813         "x T ",                 "troff output",                 4,      "application/troff",
814         "GIF",                  "GIF image",                    3,      "image/gif",
815         "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
816         "%PDF",                 "PDF",                          4,      "application/pdf",
817         "<!DOCTYPE",            "HTML file",                    9,      "text/html",
818         "<!doctype",            "HTML file",                    9,      "text/html",
819         "<!--",                 "HTML file",                    4,      "text/html",
820         "<html>",               "HTML file",                    6,      "text/html",
821         "<HTML>",               "HTML file",                    6,      "text/html",
822         "<?xml",                "HTML file",                    5,      "text/html",
823         "\111\111\052\000",     "tiff",                         4,      "image/tiff",
824         "\115\115\000\052",     "tiff",                         4,      "image/tiff",
825         "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
826         "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
827         "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
828         "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
829         "BM",                   "bmp",                          2,      "image/bmp", 
830         "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
831         "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
832         "\033E\033",            "HP PCL printer data",          3,      OCTET,
833         "\033&",                "HP PCL printer data",          2,      OCTET,
834         "\033%-12345X",         "HPJCL file",           9,      "application/hpjcl",
835         "\033Lua",              "Lua bytecode",         4,      OCTET,
836         "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
837         "OggS",                 "ogg audio",            4,      "audio/ogg",
838         ".snd",                 "sun audio",            4,      "audio/basic",
839         "\211PNG",              "PNG image",            4,      "image/png",
840         "P1\n",                 "ppm",                  3,      "image/ppm",
841         "P2\n",                 "ppm",                  3,      "image/ppm",
842         "P3\n",                 "ppm",                  3,      "image/ppm",
843         "P4\n",                 "ppm",                  3,      "image/ppm",
844         "P5\n",                 "ppm",                  3,      "image/ppm",
845         "P6\n",                 "ppm",                  3,      "image/ppm",
846         "/* XPM */\n",  "xbm",                          10,     "image/xbm",
847         ".HTML ",               "troff -ms input",      6,      "text/troff",
848         ".LP",                  "troff -ms input",      3,      "text/troff",
849         ".ND",                  "troff -ms input",      3,      "text/troff",
850         ".PP",                  "troff -ms input",      3,      "text/troff",
851         ".TL",                  "troff -ms input",      3,      "text/troff",
852         ".TR",                  "troff -ms input",      3,      "text/troff",
853         ".TH",                  "manual page",          3,      "text/troff",
854         ".\\\"",                "troff input",          3,      "text/troff",
855         ".de",                  "troff input",          3,      "text/troff",
856         ".if",                  "troff input",          3,      "text/troff",
857         ".nr",                  "troff input",          3,      "text/troff",
858         ".tr",                  "troff input",          3,      "text/troff",
859         "vac:",                 "venti score",          4,      PLAIN,
860         "-----BEGIN CERTIFICATE-----\n",
861                                 "pem certificate",      -1,     PLAIN,
862         "-----BEGIN TRUSTED CERTIFICATE-----\n",
863                                 "pem trusted certificate", -1,  PLAIN,
864         "-----BEGIN X509 CERTIFICATE-----\n",
865                                 "pem x.509 certificate", -1,    PLAIN,
866         "subject=/C=",          "pem certificate with header", -1, PLAIN,
867         "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
868         "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
869         "[playlist]",           "playlist",             10,     "application/x-scpls",
870         "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
871         "BEGIN:VCARD\r\n",      "vCard",                13,     "text/directory;profile=vcard",
872         "BEGIN:VCARD\n",        "vCard",                12,     "text/directory;profile=vcard",
873         "AT&T",                 "DjVu document",        4,      "image/vnd.djvu",
874         "Extended module: ",    "XM audio",             17,     "audio/xm",
875         "MThd",                 "midi audio",           4,      "audio/midi",
876         "MUS\x1a",              "mus audio",            4,      "audio/mus",
877         0,0,0,0
878 };
879
880 int
881 istring(void)
882 {
883         int i, l;
884         struct FILE_STRING *p;
885
886         for(p = file_string; p->key; p++) {
887                 l = p->length;
888                 if(l == -1)
889                         l = strlen(p->key);
890                 if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
891                         print("%s\n", mime ? p->mime : p->filetype);
892                         return 1;
893                 }
894         }
895         if(strncmp((char*)buf, "TYPE=", 5) == 0) {      /* td */
896                 for(i = 5; i < nbuf; i++)
897                         if(buf[i] == '\n')
898                                 break;
899                 if(mime)
900                         print("%s\n", OCTET);
901                 else
902                         print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
903                 return 1;
904         }
905         return 0;
906 }
907
908 struct offstr
909 {
910         ulong   off;
911         struct FILE_STRING;
912 } offstrs[] = {
913         32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
914         32*4, "DICM",   "DICOM medical imaging data",   4,      "application/dicom",
915         0, 0, 0, 0, 0
916 };
917
918 int
919 isoffstr(void)
920 {
921         int n;
922         char buf[256];
923         struct offstr *p;
924
925         for(p = offstrs; p->key; p++) {
926                 seek(fd, p->off, 0);
927                 n = p->length;
928                 if (n > sizeof buf)
929                         n = sizeof buf;
930                 if (readn(fd, buf, n) != n)
931                         continue;
932                 if(memcmp(buf, p->key, n) == 0) {
933                         print("%s\n", mime ? p->mime : p->filetype);
934                         return 1;
935                 }
936         }
937         return 0;
938 }
939
940 int
941 iff(void)
942 {
943         if (strncmp((char*)buf, "FORM", 4) == 0 &&
944             strncmp((char*)buf+8, "AIFF", 4) == 0) {
945                 print("%s\n", mime? "audio/x-aiff": "aiff audio");
946                 return 1;
947         }
948         if (strncmp((char*)buf, "RIFF", 4) == 0) {
949                 if (strncmp((char*)buf+8, "WAVE", 4) == 0)
950                         print("%s\n", mime? "audio/wave": "wave audio");
951                 else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
952                         print("%s\n", mime? "video/avi": "avi video");
953                 else
954                         print("%s\n", mime? OCTET : "riff file");
955                 return 1;
956         }
957         return 0;
958 }
959
960 char*   html_string[] = {
961         "blockquote",
962         "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
963         "caption",
964         "button", "center", "iframe", "object", "option", "script",
965         "select", "strong",
966         "blink", "embed", "frame", "input", "label", "param", "small",
967         "style", "table", "tbody", "tfoot", "thead", "title",
968         "?xml", "body", "code", "font", "form", "head", "html",
969         "link", "menu", "meta", "span",
970         "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
971         "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
972         "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
973         "a", "b", "i", "p", "q", "u",
974         0,
975 };
976
977 int
978 ishtml(void)
979 {
980         int i, n, count;
981         uchar *p;
982
983         count = 0;
984         p = buf;
985         for(;;) {
986                 while(p < buf+nbuf && *p != '<')
987                         p++;
988                 p++;
989                 if (p >= buf+nbuf)
990                         break;
991                 if(*p == '/')
992                         p++;
993                 if(p >= buf+nbuf)
994                         break;
995                 for(i = 0; html_string[i]; i++){
996                         n = strlen(html_string[i]);
997                         if(p + n > buf+nbuf)
998                                 continue;
999                         if(cistrncmp(html_string[i], (char*)p, n) == 0) {
1000                                 p += n;
1001                                 if(p < buf+nbuf && strchr("\t\r\n />", *p)){
1002                                         if(++count > 2) {
1003                                                 print("%s\n", mime ? "text/html" : "HTML file");
1004                                                 return 1;
1005                                         }
1006                                 }
1007                                 break;
1008                         }
1009                 }
1010         }
1011         return 0;
1012 }
1013
1014 char*   rfc822_string[] =
1015 {
1016         "from:",
1017         "date:",
1018         "to:",
1019         "subject:",
1020         "received:",
1021         "reply to:",
1022         "sender:",
1023         0,
1024 };
1025
1026 int
1027 isrfc822(void)
1028 {
1029
1030         char *p, *q, *r;
1031         int i, count;
1032
1033         count = 0;
1034         p = (char*)buf;
1035         for(;;) {
1036                 q = strchr(p, '\n');
1037                 if(q == nil)
1038                         break;
1039                 *q = 0;
1040                 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
1041                         count++;
1042                         *q = '\n';
1043                         p = q+1;
1044                         continue;
1045                 }
1046                 *q = '\n';
1047                 if(*p != '\t' && *p != ' '){
1048                         r = strchr(p, ':');
1049                         if(r == 0 || r > q)
1050                                 break;
1051                         for(i = 0; rfc822_string[i]; i++) {
1052                                 if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
1053                                         count++;
1054                                         break;
1055                                 }
1056                         }
1057                 }
1058                 p = q+1;
1059         }
1060         if(count >= 3){
1061                 print("%s\n", mime ? "message/rfc822" : "email file");
1062                 return 1;
1063         }
1064         return 0;
1065 }
1066
1067 int
1068 ismbox(void)
1069 {
1070         char *p, *q;
1071
1072         p = (char*)buf;
1073         q = strchr(p, '\n');
1074         if(q == nil)
1075                 return 0;
1076         *q = 0;
1077         if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
1078                 print("%s\n", mime ? PLAIN : "mail box");
1079                 return 1;
1080         }
1081         *q = '\n';
1082         return 0;
1083 }
1084
1085 int
1086 iscint(void)
1087 {
1088         int type;
1089         char *name;
1090         Biobuf b;
1091
1092         if(Binit(&b, fd, OREAD) == Beof)
1093                 return 0;
1094         seek(fd, 0, 0);
1095         type = objtype(&b, &name);
1096         if(type < 0)
1097                 return 0;
1098         if(mime)
1099                 print("%s\n", OCTET);
1100         else
1101                 print("%s intermediate\n", name);
1102         return 1;
1103 }
1104
1105 int
1106 isc(void)
1107 {
1108         int n;
1109
1110         n = wfreq[I1];
1111         /*
1112          * includes
1113          */
1114         if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1115                 goto yes;
1116         if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
1117                 goto yes;
1118         /*
1119          * declarations
1120          */
1121         if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
1122                 goto yes;
1123         /*
1124          * assignments
1125          */
1126         if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
1127                 goto yes;
1128         return 0;
1129
1130 yes:
1131         if(mime){
1132                 print("%s\n", PLAIN);
1133                 return 1;
1134         }
1135         if(wfreq[Alword] > 0)
1136                 print("alef program\n");
1137         else
1138                 print("c program\n");
1139         return 1;
1140 }
1141
1142 int
1143 islimbo(void)
1144 {
1145         /*
1146          * includes
1147          */
1148         if(wfreq[Lword] < 4)
1149                 return 0;
1150         print("%s\n", mime ? PLAIN : "limbo program");
1151         return 1;
1152 }
1153
1154 int
1155 isas(void)
1156 {
1157         /*
1158          * includes
1159          */
1160         if(wfreq[Aword] < 2)
1161                 return 0;
1162         print("%s\n", mime ? PLAIN : "as program");
1163         return 1;
1164 }
1165
1166 int
1167 istga(void)
1168 {
1169         uchar *p;
1170
1171         p = buf;
1172         if(nbuf < 18)
1173                 return 0;
1174         if((p[12] | p[13]<<8) == 0)     /* width */
1175                 return 0;
1176         if((p[14] | p[15]<<8) == 0)     /* height */
1177                 return 0;
1178         if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32)      /* bpp */
1179                 return 0;
1180         if(((p[2]|(1<<3)) & (~3)) != (1<<3))    /* rle flag */
1181                 return 0;
1182         if(p[1] == 0){  /* non color-mapped */
1183                 if((p[2]&3) != 2 && (p[2]&3) != 3)      
1184                         return 0;
1185                 if((p[5] | p[6]<<8) != 0)       /* palette length */
1186                         return 0;
1187         } else
1188         if(p[1] == 1){  /* color-mapped */
1189                 if((p[2]&3) != 1 || p[7] == 0)  
1190                         return 0;
1191                 if((p[5] | p[6]<<8) == 0)       /* palette length */
1192                         return 0;
1193         } else
1194                 return 0;
1195         print("%s\n", mime ? "image/tga" : "targa image");
1196         return 1;
1197 }
1198
1199 int
1200 ismp3(void)
1201 {
1202         uchar *p, *e;
1203
1204         p = buf;
1205         e = p + nbuf-1;
1206         while((p < e) && (p = memchr(p, 0xFF, e - p))){
1207                 if((p[1] & 0xFE) == 0xFA){
1208                         print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
1209                         return 1;
1210                 }
1211                 p++;
1212         }
1213         return 0;
1214 }
1215
1216 /*
1217  * low entropy means encrypted
1218  */
1219 int
1220 ismung(void)
1221 {
1222         int i, bucket[8];
1223         float cs;
1224
1225         if(nbuf < 64)
1226                 return 0;
1227         memset(bucket, 0, sizeof(bucket));
1228         for(i=nbuf-64; i<nbuf; i++)
1229                 bucket[(buf[i]>>5)&07] += 1;
1230
1231         cs = 0.;
1232         for(i=0; i<8; i++)
1233                 cs += (bucket[i]-8)*(bucket[i]-8);
1234         cs /= 8.;
1235         if(cs <= 24.322) {
1236                 if(buf[0]==0x1f && buf[1]==0x9d)
1237                         print("%s\n", mime ? "application/x-compress" : "compressed");
1238                 else
1239                 if(buf[0]==0x1f && buf[1]==0x8b)
1240                         print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
1241                 else
1242                 if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
1243                         print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
1244                 else
1245                 if(buf[0]==0x78 && buf[1]==0x9c)
1246                         print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
1247                 else
1248                         print("%s\n", mime ? OCTET : "encrypted");
1249                 return 1;
1250         }
1251         return 0;
1252 }
1253
1254 /*
1255  * english by punctuation and frequencies
1256  */
1257 int
1258 isenglish(void)
1259 {
1260         int vow, comm, rare, badpun, punct;
1261         char *p;
1262
1263         if(guess != Fascii && guess != Feascii)
1264                 return 0;
1265         badpun = 0;
1266         punct = 0;
1267         for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
1268                 switch(*p) {
1269                 case '.':
1270                 case ',':
1271                 case ')':
1272                 case '%':
1273                 case ';':
1274                 case ':':
1275                 case '?':
1276                         punct++;
1277                         if(p[1] != ' ' && p[1] != '\n')
1278                                 badpun++;
1279                 }
1280         if(badpun*5 > punct)
1281                 return 0;
1282         if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])       /* shell file test */
1283                 return 0;
1284         if(2*cfreq[';'] > cfreq['e'])
1285                 return 0;
1286
1287         vow = 0;
1288         for(p="AEIOU"; *p; p++) {
1289                 vow += cfreq[*p];
1290                 vow += cfreq[tolower(*p)];
1291         }
1292         comm = 0;
1293         for(p="ETAION"; *p; p++) {
1294                 comm += cfreq[*p];
1295                 comm += cfreq[tolower(*p)];
1296         }
1297         rare = 0;
1298         for(p="VJKQXZ"; *p; p++) {
1299                 rare += cfreq[*p];
1300                 rare += cfreq[tolower(*p)];
1301         }
1302         if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
1303                 print("%s\n", mime ? PLAIN : "English text");
1304                 return 1;
1305         }
1306         return 0;
1307 }
1308
1309 /*
1310  * pick up a number with
1311  * syntax _*[0-9]+_
1312  */
1313 #define P9BITLEN        12
1314 int
1315 p9bitnum(char *s, int *v)
1316 {
1317         char *es;
1318
1319         if(s[P9BITLEN-1] != ' ')
1320                 return -1;
1321         s[P9BITLEN-1] = '\0';
1322         *v = strtol(s, &es, 10);
1323         s[P9BITLEN-1] = ' ';
1324         if(es != &s[P9BITLEN-1])
1325                 return -1;
1326         return 0;
1327 }
1328
1329 int
1330 depthof(char *s, int *newp)
1331 {
1332         char *es;
1333         int d;
1334
1335         *newp = 0;
1336         es = s+12;
1337         while(s<es && *s==' ')
1338                 s++;
1339         if(s == es)
1340                 return -1;
1341         if('0'<=*s && *s<='9')
1342                 return 1<<strtol(s, nil, 0);
1343
1344         *newp = 1;
1345         d = 0;
1346         while(s<es && *s!=' '){
1347                 if(strchr("rgbkamx", *s) == nil)
1348                         return -1;
1349                 s++;
1350                 if('0'<=*s && *s<='9')
1351                         d += strtoul(s, &s, 10);
1352                 else
1353                         return -1;
1354         }
1355
1356         if(d % 8 == 0 || 8 % d == 0)
1357                 return d;
1358         else
1359                 return -1;
1360 }
1361
1362 int
1363 isp9bit(void)
1364 {
1365         int dep, lox, loy, hix, hiy, px, new, cmpr;
1366         long len;
1367         char *newlabel;
1368         uchar *cp;
1369
1370         cp = buf;
1371         cmpr = 0;
1372         if(memcmp(cp, "compressed\n", 11) == 0) {
1373                 cmpr = 1;
1374                 cp = buf + 11;
1375         }
1376
1377         if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
1378                 return 0;
1379         newlabel = new ? "" : "old ";
1380         if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
1381                 return 0;
1382         if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
1383                 return 0;
1384         if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
1385                 return 0;
1386         if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
1387                 return 0;
1388
1389         hix -= lox;
1390         hiy -= loy;
1391         if(hix <= 0 || hiy <= 0)
1392                 return 0;
1393
1394         if(dep < 8){
1395                 px = 8/dep;             /* pixels per byte */
1396                 /* set l to number of bytes of data per scan line */
1397                 len = (hix+px-1)/px;
1398         }else
1399                 len = hix*dep/8;
1400         len *= hiy;                     /* col length */
1401         len += 5 * P9BITLEN;            /* size of initial ascii */
1402
1403         /*
1404          * for compressed images, don't look any further. otherwise:
1405          * for image file, length is non-zero and must match calculation above.
1406          * for /dev/window and /dev/screen the length is always zero.
1407          * for subfont, the subfont header should follow immediately.
1408          */
1409         if (cmpr) {
1410                 print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
1411                         newlabel, dep, hix, hiy);
1412                 return 1;
1413         }
1414         /*
1415          * mbuf->length == 0 probably indicates reading a pipe.
1416          * Ghostscript sometimes produces a little extra on the end.
1417          */
1418         if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
1419             mbuf->length > len && mbuf->length < len+P9BITLEN)) {
1420                 print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
1421                         newlabel, dep, hix, hiy);
1422                 return 1;
1423         }
1424         if (p9subfont(buf+len)) {
1425                 print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
1426                         newlabel, dep, hix, hiy);
1427                 return 1;
1428         }
1429         return 0;
1430 }
1431
1432 int
1433 p9subfont(uchar *p)
1434 {
1435         int n, h, a;
1436
1437         /* if image too big, assume it's a subfont */
1438         if (p+3*P9BITLEN > buf+sizeof(buf))
1439                 return 1;
1440
1441         if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0)    /* char count */
1442                 return 0;
1443         if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0)    /* height */
1444                 return 0;
1445         if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0)    /* ascent */
1446                 return 0;
1447         if(n > 0 && h > 0 && a >= 0)
1448                 return 1;
1449         return 0;
1450 }
1451
1452 #define WHITESPACE(c)           ((c) == ' ' || (c) == '\t' || (c) == '\n')
1453
1454 int
1455 isp9font(void)
1456 {
1457         uchar *cp, *p;
1458         int i, n;
1459         char pathname[1024];
1460
1461         cp = buf;
1462         if (!getfontnum(cp, &cp))       /* height */
1463                 return 0;
1464         if (!getfontnum(cp, &cp))       /* ascent */
1465                 return 0;
1466         for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
1467                 if (!getfontnum(cp, &cp))       /* min */
1468                         break;
1469                 if (!getfontnum(cp, &cp))       /* max */
1470                         return 0;
1471                 getfontnum(cp, &cp);    /* optional offset */
1472                 while (WHITESPACE(*cp))
1473                         cp++;
1474                 for (p = cp; *cp && !WHITESPACE(*cp); cp++)
1475                                 ;
1476                         /* construct a path name, if needed */
1477                 n = 0;
1478                 if (*p != '/' && slash) {
1479                         n = slash-fname+1;
1480                         if (n < sizeof(pathname))
1481                                 memcpy(pathname, fname, n);
1482                         else n = 0;
1483                 }
1484                 if (n+cp-p+4 < sizeof(pathname)) {
1485                         memcpy(pathname+n, p, cp-p);
1486                         n += cp-p;
1487                         pathname[n] = 0;
1488                         if (access(pathname, AEXIST) < 0) {
1489                                 strcpy(pathname+n, ".0");
1490                                 if (access(pathname, AEXIST) < 0)
1491                                         return 0;
1492                         }
1493                 }
1494         }
1495         if (i) {
1496                 print("%s\n", mime ? PLAIN : "font file");
1497                 return 1;
1498         }
1499         return 0;
1500 }
1501
1502 int
1503 getfontnum(uchar *cp, uchar **rp)
1504 {
1505         while (WHITESPACE(*cp))         /* extract ulong delimited by whitespace */
1506                 cp++;
1507         if (*cp < '0' || *cp > '9')
1508                 return 0;
1509         strtoul((char *)cp, (char **)rp, 0);
1510         if (!WHITESPACE(**rp)) {
1511                 *rp = cp;
1512                 return 0;
1513         }
1514         return 1;
1515 }
1516
1517 int
1518 isrtf(void)
1519 {
1520         if(strstr((char *)buf, "\\rtf1")){
1521                 print(mime ? "application/rtf\n" : "rich text format\n");
1522                 return 1;
1523         }
1524         return 0;
1525 }
1526
1527 int
1528 ismsdos(void)
1529 {
1530         if (buf[0] == 0x4d && buf[1] == 0x5a){
1531                 print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
1532                 return 1;
1533         }
1534         return 0;
1535 }
1536
1537 int
1538 isicocur(void)
1539 {
1540         if(buf[0] || buf[1] || buf[3] || buf[9])
1541                 return 0;
1542         if(buf[4] == 0x00 && buf[5] == 0x00)
1543                 return 0;
1544         switch(buf[2]){
1545         case 1:
1546                 print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
1547                 return 1;
1548         case 2:
1549                 print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
1550                 return 1;
1551         }
1552         return 0;
1553 }
1554
1555 int
1556 iself(void)
1557 {
1558         static char *cpu[] = {          /* NB: incomplete and arbitary list */
1559         [1]     "WE32100",
1560         [2]     "SPARC",
1561         [3]     "i386",
1562         [4]     "M68000",
1563         [5]     "M88000",
1564         [6]     "i486",
1565         [7]     "i860",
1566         [8]     "R3000",
1567         [9]     "S370",
1568         [10]    "R4000",
1569         [15]    "HP-PA",
1570         [18]    "sparc v8+",
1571         [19]    "i960",
1572         [20]    "PPC-32",
1573         [21]    "PPC-64",
1574         [40]    "ARM",
1575         [41]    "Alpha",
1576         [43]    "sparc v9",
1577         [50]    "IA-64",
1578         [62]    "AMD64",
1579         [75]    "VAX",
1580         };
1581         static char *type[] = {
1582         [1]     "relocatable object",
1583         [2]     "executable",
1584         [3]     "shared library",
1585         [4]     "core dump",
1586         };
1587
1588         if (memcmp(buf, "\x7fELF", 4) == 0){
1589                 if (!mime){
1590                         int isdifend = 0;
1591                         int n = (buf[19] << 8) | buf[18];
1592                         char *p = "unknown";
1593                         char *t = "unknown";
1594
1595                         if (n > 0 && n < nelem(cpu) && cpu[n])
1596                                 p = cpu[n];
1597                         else {
1598                                 /* try the other byte order */
1599                                 isdifend = 1;
1600                                 n = (buf[18] << 8) | buf[19];
1601                                 if (n > 0 && n < nelem(cpu) && cpu[n])
1602                                         p = cpu[n];
1603                         }
1604                         if(isdifend)
1605                                 n = (buf[16]<< 8) | buf[17];
1606                         else
1607                                 n = (buf[17]<< 8) | buf[16];
1608
1609                         if(n>0 && n < nelem(type) && type[n])
1610                                 t = type[n];
1611                         print("%s ELF %s\n", p, t);
1612                 }
1613                 else
1614                         print("application/x-elf-executable\n");
1615                 return 1;
1616         }
1617
1618         return 0;
1619 }
1620
1621 int
1622 isface(void)
1623 {
1624         int i, j, ldepth, l;
1625         char *p;
1626
1627         ldepth = -1;
1628         for(j = 0; j < 3; j++){
1629                 for(p = (char*)buf, i=0; i<3; i++){
1630                         if(p[0] != '0' || p[1] != 'x')
1631                                 return 0;
1632                         if(buf[2+8] == ',')
1633                                 l = 2;
1634                         else if(buf[2+4] == ',')
1635                                 l = 1;
1636                         else
1637                                 return 0;
1638                         if(ldepth == -1)
1639                                 ldepth = l;
1640                         if(l != ldepth)
1641                                 return 0;
1642                         strtoul(p, &p, 16);
1643                         if(*p++ != ',')
1644                                 return 0;
1645                         while(*p == ' ' || *p == '\t')
1646                                 p++;
1647                 }
1648                 if (*p++ != '\n')
1649                         return 0;
1650         }
1651
1652         if(mime)
1653                 print("application/x-face\n");
1654         else
1655                 print("face image depth %d\n", ldepth);
1656         return 1;
1657 }