int isrfc822(void);
int ismbox(void);
int islimbo(void);
+int ismp3(void);
int ismung(void);
int isp9bit(void);
int isp9font(void);
int isrtf(void);
int ismsdos(void);
+int isicocur(void);
int iself(void);
int istring(void);
int isoffstr(void);
isp9bit, /* plan 9 image (as from /dev/window) */
isrtf, /* rich text format */
ismsdos, /* msdos exe (virus file attachement) */
+ isicocur, /* windows icon or cursor file */
isface, /* ascii face file */
+ ismp3,
/* last resorts */
ismung, /* entropy compressed/encrypted */
/* "pXc2 */
0x32630070, 0xFFFF00FF, "pac4 audio file\n", OCTET,
0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET,
- 0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", OCTET,
+ 0x43614c66, 0xFFFFFFFF, "FLAC audio file\n", "audio/flac",
0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET,
0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip",
- 070707, 0xFFFF, "cpio archive\n", OCTET,
+ 070707, 0xFFFF, "cpio archive\n", "application/x-cpio",
0x2F7, 0xFFFF, "tex dvi\n", "application/dvi",
0xfaff, 0xfeff, "mp3 audio\n", "audio/mpeg",
0xfeff0000, 0xffffffff, "utf-32be\n", "text/plain charset=utf-32be",
chksum = strtol(hdr->chksum, 0, 8);
if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
if (strcmp(hdr->magic, "ustar") == 0)
- print(mime? "application/x-ustar\n":
- "posix tar archive\n");
+ print(mime? "application/x-ustar\n": "posix tar archive\n");
else
print(mime? "application/x-tar\n": "tar archive\n");
return 1;
char *mime;
} file_string[] =
{
+ "\x1f\x9d", "compressed", 2, "application/x-compress",
+ "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
+ "BZh", "bzip2 compressed", 3, "application/x-bzip2",
"!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream",
"!<arch>\n", "archive", 8, "application/octet-stream",
"070707", "cpio archive - ascii header", 6, "application/octet-stream",
"GIF", "GIF image", 3, "image/gif",
"\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
"%PDF", "PDF", 4, "application/pdf",
- "<html>\n", "HTML file", 7, "text/html",
- "<HTML>\n", "HTML file", 7, "text/html",
+ "<!DOCTYPE", "HTML file", 9, "text/html",
+ "<!doctype", "HTML file", 9, "text/html",
+ "<!--", "HTML file", 4, "text/html",
+ "<html>", "HTML file", 6, "text/html",
+ "<HTML>", "HTML file", 6, "text/html",
+ "<?xml", "HTML file", 5, "text/html",
"\111\111\052\000", "tiff", 4, "image/tiff",
"\115\115\000\052", "tiff", 4, "image/tiff",
"\377\330\377\340", "jpeg", 4, "image/jpeg",
"\377\330\377\341", "jpeg", 4, "image/jpeg",
"\377\330\377\333", "jpeg", 4, "image/jpeg",
- "BM", "bmp", 2, "image/bmp",
- "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream",
+ "\xff\xd8", "jpeg", 2, "image/jpeg",
+ "BM", "bmp", 2, "image/bmp",
+ "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
"<MakerFile ", "FrameMaker file", 11, "application/framemaker",
"\033E\033", "HP PCL printer data", 3, OCTET,
"\033%-12345X", "HPJCL file", 9, "application/hpjcl",
"ID3", "mp3 audio with id3", 3, "audio/mpeg",
+ "OggS", "ogg audio", 4, "audio/ogg",
"\211PNG", "PNG image", 4, "image/png",
"P3\n", "ppm", 3, "image/ppm",
"P6\n", "ppm", 3, "image/ppm",
"pem x.509 certificate", -1, "text/plain",
"subject=/C=", "pem certificate with header", -1, "text/plain",
"process snapshot ", "process snapshot", -1, "application/snapfs",
+ "d8:announce", "torrent file", 11, "application/x-bittorrent",
+ "[playlist]", "playlist", 10, "application/x-scpls",
+ "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
0,0,0,0
};
ulong off;
struct FILE_STRING;
} offstrs[] = {
- 32*1024, "\001CD001\001", "ISO9660 CD image", 7, OCTET,
+ 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
0, 0, 0, 0, 0
};
return 0;
}
-char* html_string[] =
-{
- "title",
- "body",
- "head",
- "strong",
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6",
- "ul",
- "li",
- "dl",
- "br",
- "em",
+char* html_string[] = {
+ "blockquote",
+ "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
+ "caption",
+ "button", "center", "iframe", "object", "option", "script",
+ "select", "strong",
+ "blink", "embed", "frame", "input", "label", "param", "small",
+ "style", "table", "tbody", "tfoot", "thead", "title",
+ "?xml", "body", "code", "font", "form", "head", "html",
+ "link", "menu", "meta", "span",
+ "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
+ "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
+ "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
+ "a", "b", "i", "p", "q", "u",
0,
};
int
ishtml(void)
{
- uchar *p, *q;
- int i, count;
+ int i, n, count;
+ uchar *p;
- /* compare strings between '<' and '>' to html table */
count = 0;
p = buf;
for(;;) {
- while (p < buf+nbuf && *p != '<')
+ while(p < buf+nbuf && *p != '<')
p++;
p++;
if (p >= buf+nbuf)
break;
if(*p == '/')
p++;
- q = p;
- while(p < buf+nbuf && *p != '>')
- p++;
- if (p >= buf+nbuf)
+ if(p >= buf+nbuf)
break;
- for(i = 0; html_string[i]; i++) {
- if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
- if(count++ > 4) {
- print(mime ? "text/html\n" : "HTML file\n");
- return 1;
+ for(i = 0; html_string[i]; i++){
+ n = strlen(html_string[i]);
+ if(p + n > buf+nbuf)
+ continue;
+ if(cistrncmp(html_string[i], (char*)p, n) == 0) {
+ p += n;
+ if(p < buf+nbuf && strchr("\t\r\n />", *p)){
+ if(++count > 2) {
+ print(mime ? "text/html\n" : "HTML file\n");
+ return 1;
+ }
}
break;
}
}
- p++;
}
return 0;
}
return 1;
}
+int
+ismp3(void)
+{
+ uchar *p, *e;
+
+ p = buf;
+ e = p + nbuf-1;
+ while((p < e) && (p = memchr(p, 0xFF, e - p))){
+ if((p[1] & 0xFE) == 0xFA){
+ print(mime ? "audio/mpeg\n" : "mp3 audio\n");
+ return 1;
+ }
+ p++;
+ }
+ return 0;
+}
+
/*
* low entropy means encrypted
*/
cs /= 8.;
if(cs <= 24.322) {
if(buf[0]==0x1f && buf[1]==0x9d)
- print(mime ? OCTET : "compressed\n");
+ print(mime ? "application/x-compress" : "compressed\n");
else
if(buf[0]==0x1f && buf[1]==0x8b)
- print(mime ? OCTET : "gzip compressed\n");
+ print(mime ? "application/x-gzip" : "gzip compressed\n");
else
if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
- print(mime ? OCTET : "bzip2 compressed\n");
+ print(mime ? "application/x-bzip2" : "bzip2 compressed\n");
+ else
+ if(buf[0]==0x78 && buf[1]==0x9c)
+ print(mime ? "application/x-deflate" : "zlib compressed\n");
else
print(mime ? OCTET : "encrypted\n");
return 1;
* for subfont, the subfont header should follow immediately.
*/
if (cmpr) {
- print(mime ? OCTET : "Compressed %splan 9 image or subfont, depth %d\n",
+ print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d\n",
newlabel, dep);
return 1;
}
*/
if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
mbuf->length > len && mbuf->length < len+P9BITLEN)) {
- print(mime ? OCTET : "%splan 9 image, depth %d\n", newlabel, dep);
+ print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d\n", newlabel, dep);
return 1;
}
if (p9subfont(buf+len)) {
- print(mime ? OCTET : "%ssubfont file, depth %d\n", newlabel, dep);
+ print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d\n", newlabel, dep);
return 1;
}
return 0;
return 0;
}
+int
+isicocur(void)
+{
+ if(buf[0] || buf[1] || buf[3] || buf[9])
+ return 0;
+ if(buf[4] == 0x00 && buf[5] == 0x00)
+ return 0;
+ switch(buf[2]){
+ case 1:
+ print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
+ return 1;
+ case 2:
+ print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
+ return 1;
+ }
+ return 0;
+}
+
int
iself(void)
{