]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/uhtml.c
/sys/src/cmd/ndb/dns.h:
[plan9front.git] / sys / src / cmd / uhtml.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4
5 int nbuf;
6 char buf[64*1024+1];
7 char *cset = nil;
8 char *whitespace = " \t\r\n";
9
10 void
11 usage(void)
12 {
13         fprint(2, "%s [ -p ] [ -c charset ] [ file ]\n", argv0);
14         exits("usage");
15 }
16
17 char*
18 attr(char *s, char *a)
19 {
20         char *e, q;
21
22         if((s = cistrstr(s, a)) == nil)
23                 return nil;
24         s += strlen(a);
25         while(*s && strchr(whitespace, *s))
26                 s++;
27         if(*s++ != '=')
28                 return nil;
29         while(*s && strchr(whitespace, *s))
30                 s++;
31         q = 0;
32         if(*s == '"' || *s == '\'')
33                 q = *s++;
34         for(e = s; *e; e++){
35                 if(*e == q)
36                         break;
37                 if(isalnum(*e))
38                         continue;
39                 if(*e == '-' || *e == '_')
40                         continue;
41                 break;
42         }
43         if((e - s) > 1)
44                 return smprint("%.*s", (int)(e - s), s);
45         return nil;
46 }
47
48 void
49 main(int argc, char *argv[])
50 {
51         int n, q, pfd[2], pflag = 0;
52         char *arg[4], *s, *g, *e, *p, *a, t;
53         Rune r;
54
55         ARGBEGIN {
56         case 'c':
57                 cset = EARGF(usage());
58                 break;
59         case 'p':
60                 pflag = 1;
61                 break;
62         default:
63                 usage();
64         } ARGEND;
65
66         if(*argv){
67                 close(0);
68                 if(open(*argv, OREAD) != 0)
69                         sysfatal("open: %r");
70         }
71         nbuf = 0;
72         while(nbuf < sizeof(buf)-1){
73                 if((n = read(0, buf + nbuf, sizeof(buf)-1-nbuf)) <= 0)
74                         break;
75                 nbuf += n;
76                 buf[nbuf] = 0;
77         }
78
79         p = buf;
80         if(nbuf >= 3 && memcmp(p, "\xEF\xBB\xBF", 3)==0){
81                 p += 3;
82                 nbuf -= 3;
83                 cset = "utf";
84                 goto Found;
85         }
86         if(nbuf >= 2 && memcmp(p, "\xFE\xFF", 2) == 0){
87                 p += 2;
88                 nbuf -= 2;
89                 cset = "unicode-be";
90                 goto Found;
91         }
92         if(nbuf >= 2 && memcmp(p, "\xFF\xFE", 2) == 0){
93                 p += 2;
94                 nbuf -= 2;
95                 cset = "unicode-le";
96                 goto Found;
97         }
98
99         s = p;
100         do {
101                 if((s = strchr(s, '<')) == nil)
102                         break;
103                 q = 0;
104                 g = ++s;
105                 e = buf+nbuf;
106                 while(s < e){
107                         if(*s == '=' && q == 0)
108                                 q = '=';
109                         else if(*s == '\'' || *s == '"'){
110                                 if(q == '=')
111                                         q = *s;
112                                 else if(q == *s)
113                                         q = 0;
114                         }
115                         else if(*s == '>' && q != '\'' && q != '"'){
116                                 e = s;
117                                 break;
118                         }
119                         else if(q == '=' && strchr(whitespace, *s) == nil)
120                                 q = 0;
121                         s++;
122                 }
123                 t = *e;
124                 *e = 0;
125                 if((a = attr(g, "encoding")) != nil || (a = attr(g, "charset")) != nil)
126                 if(cistrcmp(a, "utf") != 0 && cistrcmp(a, "utf-8") != 0){
127                         cset = a;
128                         *e = t;
129                         break;
130                 }
131                 *e = t;
132                 s = ++e;
133         } while(t);
134
135         s = p;
136         while(s+UTFmax < p+nbuf){
137                 s += chartorune(&r, s);
138                 if(r == Runeerror){
139                         if(cset == nil)
140                                 cset = "latin1";
141                         goto Found;
142                 }
143         }
144         cset = "utf";
145
146 Found:
147         if(pflag){
148                 print("%s\n", cset);
149                 exits(0);
150         }
151
152         if(nbuf == 0){
153                 write(1, p, 0);
154                 exits(0);
155         }
156
157         if(pipe(pfd) < 0)
158                 sysfatal("pipe: %r");
159
160         switch(rfork(RFFDG|RFREND|RFPROC)){
161         case -1:
162                 sysfatal("fork: %r");
163         case 0:
164                 dup(pfd[0], 0);
165                 close(pfd[0]);
166                 close(pfd[1]);
167
168                 arg[0] = "rc";
169                 arg[1] = "-c";
170                 arg[2] = smprint("{tcs -f %s || cat} | tcs -f html", cset);
171                 arg[3] = nil;
172                 exec("/bin/rc", arg);
173         }
174
175         dup(pfd[1], 1);
176         close(pfd[0]);
177         close(pfd[1]);
178
179         while(nbuf > 0){
180                 if(write(1, p, nbuf) != nbuf)
181                         sysfatal("write: %r");
182                 p = buf;
183                 if((nbuf = read(0, p, sizeof(buf))) < 0)
184                         sysfatal("read: %r");
185         }
186         close(1);
187         waitpid();
188         exits(0);
189 }