]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/uhtml.c
uhtml: use first match
[plan9front.git] / sys / src / cmd / uhtml.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4
5 int nbuf;
6 char buf[64*1024+1];
7 char *cset = nil;
8
9 void
10 usage(void)
11 {
12         fprint(2, "%s [ -p ] [ -c charset ] [ file ]\n", argv0);
13         exits("usage");
14 }
15
16 char*
17 attr(char *s, char *a)
18 {
19         char *e, q;
20
21         if((s = cistrstr(s, a)) == nil)
22                 return nil;
23         s += strlen(a);
24         while(strchr("\r\n\t ", *s))
25                 s++;
26         if(*s++ != '=')
27                 return nil;
28         while(strchr("\r\n\t ", *s))
29                 s++;
30         q = 0;
31         if(*s == '"' || *s == '\'')
32                 q = *s++;
33         for(e = s; *e; e++){
34                 if(*e == q)
35                         break;
36                 if(isalnum(*e))
37                         continue;
38                 if(*e == '-' || *e == '_')
39                         continue;
40                 break;
41         }
42         if((e - s) > 1)
43                 return smprint("%.*s", (int)(e - s), s);
44         return nil;
45 }
46
47 void
48 main(int argc, char *argv[])
49 {
50         int n, q, pfd[2], pflag = 0;
51         char *arg[4], *s, *e, *p, *g, *a, t;
52         Rune r;
53
54         ARGBEGIN {
55         case 'c':
56                 cset = EARGF(usage());
57                 break;
58         case 'p':
59                 pflag = 1;
60                 break;
61         default:
62                 usage();
63         } ARGEND;
64
65         if(*argv){
66                 close(0);
67                 if(open(*argv, OREAD) != 1)
68                         sysfatal("open: %r");
69         }
70         nbuf = 0;
71         p = buf;
72         g = buf;
73         while(nbuf < sizeof(buf)-1){
74                 if((n = read(0, buf + nbuf, sizeof(buf)-1-nbuf)) <= 0)
75                         break;
76                 nbuf += n;
77                 buf[nbuf] = 0;
78                 if(nbuf == n){
79                         if(memcmp(p, "\xEF\xBB\xBF", 3)==0){
80                                 p += 3;
81                                 nbuf -= 3;
82                                 cset = "utf";
83                                 goto Found;
84                         }
85                         if(memcmp(p, "\xFE\xFF", 2) == 0){
86                                 p += 2;
87                                 nbuf -= 2;
88                                 cset = "unicode-be";
89                                 goto Found;
90                         }
91                         if(memcmp(p, "\xFF\xFE", 2) == 0){
92                                 p += 2;
93                                 nbuf -= 2;
94                                 cset = "unicode-le";
95                                 goto Found;
96                         }
97                 }
98                 s = g;
99                 do {
100                         if((s = strchr(s, '<')) == nil)
101                                 break;
102                         q = 0;
103                         g = ++s;
104                         e = buf+nbuf;
105                         while(s < e){
106                                 if(*s == '\'' || *s == '"'){
107                                         if(q == 0)
108                                                 q = *s;
109                                         else if(q == *s)
110                                                 q = 0;
111                                 } else if(*s == '>' && q == 0){
112                                         e = s;
113                                         break;
114                                 }
115                                 s++;
116                         }
117                         t = *e;
118                         *e = 0;
119                         if((a = attr(g, "encoding")) || (a = attr(g, "charset"))){
120                                 *e = t;
121                                 cset = a;
122                                 goto Found;
123                         }
124                         *e = t;
125                         s = ++e;
126                 } while(t);
127         }
128         s = p;
129         while(s+UTFmax < p+nbuf){
130                 s += chartorune(&r, s);
131                 if(r == Runeerror){
132                         cset = "latin1";
133                         goto Found;
134                 }
135         }
136         cset = "utf";
137 Found:
138         if(pflag){
139                 print("%s\n", cset);
140                 exits(0);
141         }
142
143         if(nbuf == 0){
144                 write(1, p, 0);
145                 exits(0);
146         }
147
148         if(pipe(pfd) < 0)
149                 sysfatal("pipe: %r");
150
151         switch(rfork(RFFDG|RFREND|RFPROC)){
152         case -1:
153                 sysfatal("fork: %r");
154         case 0:
155                 dup(pfd[0], 0);
156                 close(pfd[0]);
157                 close(pfd[1]);
158
159                 arg[0] = "rc";
160                 arg[1] = "-c";
161                 arg[2] = smprint("{tcs -f %s || cat} | tcs -f html", cset);
162                 arg[3] = nil;
163                 exec("/bin/rc", arg);
164         }
165
166         dup(pfd[1], 1);
167         close(pfd[0]);
168         close(pfd[1]);
169
170         while(nbuf > 0){
171                 if(write(1, p, nbuf) != nbuf)
172                         sysfatal("write: %r");
173                 p = buf;
174                 if((nbuf = read(0, p, sizeof(buf))) < 0)
175                         sysfatal("read: %r");
176         }
177         close(1);
178         waitpid();
179         exits(0);
180 }