]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/cmd/file.c
mothra: never snarf the "Go:" box
[plan9front.git] / sys / src / cmd / file.c
index 78e202eb920cca299909afd90b5d6e8ebb3c15cc..7da5abdaaf6540421d8d2c185eac40e181fb79b0 100644 (file)
@@ -150,11 +150,13 @@ int       ishtml(void);
 int    isrfc822(void);
 int    ismbox(void);
 int    islimbo(void);
+int    ismp3(void);
 int    ismung(void);
 int    isp9bit(void);
 int    isp9font(void);
 int    isrtf(void);
 int    ismsdos(void);
+int    isicocur(void);
 int    iself(void);
 int    istring(void);
 int    isoffstr(void);
@@ -192,7 +194,9 @@ int (*call[])(void) =
        isp9bit,        /* plan 9 image (as from /dev/window) */
        isrtf,          /* rich text format */
        ismsdos,        /* msdos exe (virus file attachement) */
+       isicocur,               /* windows icon or cursor file */
        isface,         /* ascii face file */
+       ismp3,
 
        /* last resorts */
        ismung,         /* entropy compressed/encrypted */
@@ -586,10 +590,10 @@ Filemagic long0tab[] = {
        /* "pXc2 */
        0x32630070,     0xFFFF00FF,     "pac4 audio file\n",    OCTET,
        0xBA010000,     0xFFFFFFFF,     "mpeg system stream\n", OCTET,
-       0x43614c66,     0xFFFFFFFF,     "FLAC audio file\n",    OCTET,
+       0x43614c66,     0xFFFFFFFF,     "FLAC audio file\n",    "audio/flac",
        0x30800CC0,     0xFFFFFFFF,     "inferno .dis executable\n", OCTET,
        0x04034B50,     0xFFFFFFFF,     "zip archive\n", "application/zip",
-       070707,         0xFFFF,         "cpio archive\n", OCTET,
+       070707,         0xFFFF,         "cpio archive\n", "application/x-cpio",
        0x2F7,          0xFFFF,         "tex dvi\n", "application/dvi",
        0xfaff,         0xfeff,         "mp3 audio\n",  "audio/mpeg",
        0xfeff0000,     0xffffffff,     "utf-32be\n",   "text/plain charset=utf-32be",
@@ -752,8 +756,7 @@ istar(void)
        chksum = strtol(hdr->chksum, 0, 8);
        if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
                if (strcmp(hdr->magic, "ustar") == 0)
-                       print(mime? "application/x-ustar\n":
-                               "posix tar archive\n");
+                       print(mime? "application/x-ustar\n": "posix tar archive\n");
                else
                        print(mime? "application/x-tar\n": "tar archive\n");
                return 1;
@@ -772,6 +775,9 @@ struct      FILE_STRING
        char    *mime;
 } file_string[] =
 {
+       "\x1f\x9d",             "compressed",                   2,      "application/x-compress",
+       "\x1f\x8b",             "gzip compressed",              2,      "application/x-gzip",
+       "BZh",                  "bzip2 compressed",             3,      "application/x-bzip2",
        "!<arch>\n__.SYMDEF",   "archive random library",       16,     "application/octet-stream",
        "!<arch>\n",            "archive",                      8,      "application/octet-stream",
        "070707",               "cpio archive - ascii header",  6,      "application/octet-stream",
@@ -787,19 +793,25 @@ struct    FILE_STRING
        "GIF",                  "GIF image",                    3,      "image/gif",
        "\0PC Research, Inc\0", "ghostscript fax file",         18,     "application/ghostscript",
        "%PDF",                 "PDF",                          4,      "application/pdf",
-       "<html>\n",             "HTML file",                    7,      "text/html",
-       "<HTML>\n",             "HTML file",                    7,      "text/html",
+       "<!DOCTYPE",            "HTML file",                    9,      "text/html",
+       "<!doctype",            "HTML file",                    9,      "text/html",
+       "<!--",                 "HTML file",                    4,      "text/html",
+       "<html>",               "HTML file",                    6,      "text/html",
+       "<HTML>",               "HTML file",                    6,      "text/html",
+       "<?xml",                "HTML file",                    5,      "text/html",
        "\111\111\052\000",     "tiff",                         4,      "image/tiff",
        "\115\115\000\052",     "tiff",                         4,      "image/tiff",
        "\377\330\377\340",     "jpeg",                         4,      "image/jpeg",
        "\377\330\377\341",     "jpeg",                         4,      "image/jpeg",
        "\377\330\377\333",     "jpeg",                         4,      "image/jpeg",
-       "BM",                   "bmp",                          2,      "image/bmp",
-       "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/octet-stream",
+       "\xff\xd8",             "jpeg",                         2,      "image/jpeg",
+       "BM",                   "bmp",                          2,      "image/bmp", 
+       "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",     "microsoft office document",    8,      "application/doc",
        "<MakerFile ",          "FrameMaker file",              11,     "application/framemaker",
        "\033E\033",    "HP PCL printer data",          3,      OCTET,
        "\033%-12345X", "HPJCL file",           9,      "application/hpjcl",
        "ID3",                  "mp3 audio with id3",   3,      "audio/mpeg",
+       "OggS",                 "ogg audio",            4,      "audio/ogg",
        "\211PNG",              "PNG image",            4,      "image/png",
        "P3\n",                 "ppm",                          3,      "image/ppm",
        "P6\n",                 "ppm",                          3,      "image/ppm",
@@ -825,6 +837,9 @@ struct      FILE_STRING
                                "pem x.509 certificate", -1,    "text/plain",
        "subject=/C=",          "pem certificate with header", -1, "text/plain",
        "process snapshot ",    "process snapshot",     -1,     "application/snapfs",
+       "d8:announce",          "torrent file",         11,     "application/x-bittorrent",
+       "[playlist]",           "playlist",             10,     "application/x-scpls",
+       "#EXTM3U",              "playlist",             7,      "audio/x-mpegurl",
        0,0,0,0
 };
 
@@ -864,7 +879,7 @@ struct offstr
        ulong   off;
        struct FILE_STRING;
 } offstrs[] = {
-       32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      OCTET,
+       32*1024, "\001CD001\001",       "ISO9660 CD image",     7,      "application/x-iso9660-image",
        0, 0, 0, 0, 0
 };
 
@@ -914,58 +929,56 @@ iff(void)
        return 0;
 }
 
-char*  html_string[] =
-{
-       "title",
-       "body",
-       "head",
-       "strong",
-       "h1",
-       "h2",
-       "h3",
-       "h4",
-       "h5",
-       "h6",
-       "ul",
-       "li",
-       "dl",
-       "br",
-       "em",
+char*  html_string[] = {
+       "blockquote",
+       "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
+       "caption",
+       "button", "center", "iframe", "object", "option", "script",
+       "select", "strong",
+       "blink", "embed", "frame", "input", "label", "param", "small",
+       "style", "table", "tbody", "tfoot", "thead", "title",
+       "?xml", "body", "code", "font", "form", "head", "html",
+       "link", "menu", "meta", "span",
+       "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
+       "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
+       "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
+       "a", "b", "i", "p", "q", "u",
        0,
 };
 
 int
 ishtml(void)
 {
-       uchar *p, *q;
-       int i, count;
+       int i, n, count;
+       uchar *p;
 
-               /* compare strings between '<' and '>' to html table */
        count = 0;
        p = buf;
        for(;;) {
-               while (p < buf+nbuf && *p != '<')
+               while(p < buf+nbuf && *p != '<')
                        p++;
                p++;
                if (p >= buf+nbuf)
                        break;
                if(*p == '/')
                        p++;
-               q = p;
-               while(p < buf+nbuf && *p != '>')
-                       p++;
-               if (p >= buf+nbuf)
+               if(p >= buf+nbuf)
                        break;
-               for(i = 0; html_string[i]; i++) {
-                       if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
-                               if(count++ > 4) {
-                                       print(mime ? "text/html\n" : "HTML file\n");
-                                       return 1;
+               for(i = 0; html_string[i]; i++){
+                       n = strlen(html_string[i]);
+                       if(p + n > buf+nbuf)
+                               continue;
+                       if(cistrncmp(html_string[i], (char*)p, n) == 0) {
+                               p += n;
+                               if(p < buf+nbuf && strchr("\t\r\n />", *p)){
+                                       if(++count > 2) {
+                                               print(mime ? "text/html\n" : "HTML file\n");
+                                               return 1;
+                                       }
                                }
                                break;
                        }
                }
-               p++;
        }
        return 0;
 }
@@ -1124,6 +1137,23 @@ isas(void)
        return 1;
 }
 
+int
+ismp3(void)
+{
+       uchar *p, *e;
+
+       p = buf;
+       e = p + nbuf-1;
+       while((p < e) && (p = memchr(p, 0xFF, e - p))){
+               if((p[1] & 0xFE) == 0xFA){
+                       print(mime ? "audio/mpeg\n" : "mp3 audio\n");
+                       return 1;
+               }
+               p++;
+       }
+       return 0;
+}
+
 /*
  * low entropy means encrypted
  */
@@ -1145,13 +1175,16 @@ ismung(void)
        cs /= 8.;
        if(cs <= 24.322) {
                if(buf[0]==0x1f && buf[1]==0x9d)
-                       print(mime ? OCTET : "compressed\n");
+                       print(mime ? "application/x-compress" : "compressed\n");
                else
                if(buf[0]==0x1f && buf[1]==0x8b)
-                       print(mime ? OCTET : "gzip compressed\n");
+                       print(mime ? "application/x-gzip" : "gzip compressed\n");
                else
                if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
-                       print(mime ? OCTET : "bzip2 compressed\n");
+                       print(mime ? "application/x-bzip2" : "bzip2 compressed\n");
+               else
+               if(buf[0]==0x78 && buf[1]==0x9c)
+                       print(mime ? "application/x-deflate" : "zlib compressed\n");
                else
                        print(mime ? OCTET : "encrypted\n");
                return 1;
@@ -1322,7 +1355,7 @@ isp9bit(void)
         * for subfont, the subfont header should follow immediately.
         */
        if (cmpr) {
-               print(mime ? OCTET : "Compressed %splan 9 image or subfont, depth %d\n",
+               print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
                        newlabel, dep);
                return 1;
        }
@@ -1332,11 +1365,11 @@ isp9bit(void)
         */
        if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
            mbuf->length > len && mbuf->length < len+P9BITLEN)) {
-               print(mime ? OCTET : "%splan 9 image, depth %d\n", newlabel, dep);
+               print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
                return 1;
        }
        if (p9subfont(buf+len)) {
-               print(mime ? OCTET : "%ssubfont file, depth %d\n", newlabel, dep);
+               print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
                return 1;
        }
        return 0;
@@ -1448,6 +1481,24 @@ ismsdos(void)
        return 0;
 }
 
+int
+isicocur(void)
+{
+       if(buf[0] || buf[1] || buf[3] || buf[9])
+               return 0;
+       if(buf[4] == 0x00 && buf[5] == 0x00)
+               return 0;
+       switch(buf[2]){
+       case 1:
+               print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
+               return 1;
+       case 2:
+               print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
+               return 1;
+       }
+       return 0;
+}
+
 int
 iself(void)
 {