]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/cmd/uhtml.c
devproc: can't wait for ourselfs to stop (thanks Shamar)
[plan9front.git] / sys / src / cmd / uhtml.c
index 2eb52b16daa17875fa9f4114ac21100e9c9b8942..bef236f43e2e94c40f945fce461f9b77946026a1 100644 (file)
@@ -5,6 +5,7 @@
 int nbuf;
 char buf[64*1024+1];
 char *cset = nil;
+char *whitespace = " \t\r\n";
 
 void
 usage(void)
@@ -21,11 +22,11 @@ attr(char *s, char *a)
        if((s = cistrstr(s, a)) == nil)
                return nil;
        s += strlen(a);
-       while(strchr("\r\n\t ", *s))
+       while(*s && strchr(whitespace, *s))
                s++;
        if(*s++ != '=')
                return nil;
-       while(strchr("\r\n\t ", *s))
+       while(*s && strchr(whitespace, *s))
                s++;
        q = 0;
        if(*s == '"' || *s == '\'')
@@ -39,16 +40,16 @@ attr(char *s, char *a)
                        continue;
                break;
        }
-       if(e - s > 1)
-               return smprint("%.*s", (int)(e-s), s);
+       if((e - s) > 1)
+               return smprint("%.*s", (int)(e - s), s);
        return nil;
 }
 
 void
 main(int argc, char *argv[])
 {
-       int n, pfd[2], pflag = 0;
-       char *arg[4], *s, *e, *p, *g, t;
+       int n, q, pfd[2], pflag = 0;
+       char *arg[4], *s, *g, *e, *p, *a, t;
        Rune r;
 
        ARGBEGIN {
@@ -64,65 +65,85 @@ main(int argc, char *argv[])
 
        if(*argv){
                close(0);
-               if(open(*argv, OREAD) != 1)
+               if(open(*argv, OREAD) != 0)
                        sysfatal("open: %r");
        }
        nbuf = 0;
-       p = buf;
-       g = buf;
        while(nbuf < sizeof(buf)-1){
                if((n = read(0, buf + nbuf, sizeof(buf)-1-nbuf)) <= 0)
                        break;
                nbuf += n;
                buf[nbuf] = 0;
-               if(nbuf == n){
-                       if(memcmp(p, "\xEF\xBB\xBF", 3)==0){
-                               p += 3;
-                               cset = "utf";
-                               break;
-                       }
-                       if(memcmp(p, "\xFE\xFF", 2) == 0){
-                               p += 2;
-                               cset = "unicode-be";
-                               break;
+       }
+
+       p = buf;
+       if(nbuf >= 3 && memcmp(p, "\xEF\xBB\xBF", 3)==0){
+               p += 3;
+               nbuf -= 3;
+               cset = "utf";
+               goto Found;
+       }
+       if(nbuf >= 2 && memcmp(p, "\xFE\xFF", 2) == 0){
+               p += 2;
+               nbuf -= 2;
+               cset = "unicode-be";
+               goto Found;
+       }
+       if(nbuf >= 2 && memcmp(p, "\xFF\xFE", 2) == 0){
+               p += 2;
+               nbuf -= 2;
+               cset = "unicode-le";
+               goto Found;
+       }
+
+       s = p;
+       do {
+               if((s = strchr(s, '<')) == nil)
+                       break;
+               q = 0;
+               g = ++s;
+               e = buf+nbuf;
+               while(s < e){
+                       if(*s == '=' && q == 0)
+                               q = '=';
+                       else if(*s == '\'' || *s == '"'){
+                               if(q == '=')
+                                       q = *s;
+                               else if(q == *s)
+                                       q = 0;
                        }
-                       if(memcmp(p, "\xFF\xFE", 2) == 0){
-                               p += 2;
-                               cset = "unicode-le";
+                       else if(*s == '>' && q != '\'' && q != '"'){
+                               e = s;
                                break;
                        }
+                       else if(q == '=' && strchr(whitespace, *s) == nil)
+                               q = 0;
+                       s++;
                }
-               s = g;
-               do {
-                       if((s = strchr(s, '<')) == nil)
-                               break;
-                       g = s;
-                       if((e = strchr(++s, '>')) == nil)
-                               e = buf+nbuf;
-                       t = *e;
-                       *e = 0;
-                       if((cset = attr(s, "encoding")) || (cset = attr(s, "charset"))){
-                               *e = t;
-                               break;
-                       }
+               t = *e;
+               *e = 0;
+               if((a = attr(g, "encoding")) != nil || (a = attr(g, "charset")) != nil)
+               if(cistrcmp(a, "utf") != 0 && cistrcmp(a, "utf-8") != 0){
+                       cset = a;
                        *e = t;
-                       s = ++e;
-               } while(t);
-       }
-       nbuf -= p - buf;
+                       break;
+               }
+               *e = t;
+               s = ++e;
+       } while(t);
 
-       if(cset == nil){
-               cset = "utf";
-               s = p;
-               while(s+UTFmax < p+nbuf){
-                       s += chartorune(&r, s);
-                       if(r == Runeerror){
+       s = p;
+       while(s+UTFmax < p+nbuf){
+               s += chartorune(&r, s);
+               if(r == Runeerror){
+                       if(cset == nil)
                                cset = "latin1";
-                               break;
-                       }
+                       goto Found;
                }
        }
+       cset = "utf";
 
+Found:
        if(pflag){
                print("%s\n", cset);
                exits(0);