]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/uhtml.c
uhtml: assume latin1 if not valid utf8
[plan9front.git] / sys / src / cmd / uhtml.c
1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4
5 int nbuf;
6 char buf[64*1024+1];
7 char *cset = nil;
8
9 void
10 usage(void)
11 {
12         fprint(2, "%s [ -h ] [ -c charset ] [ file ]\n", argv0);
13         exits("usage");
14 }
15
16 char*
17 strval(char *s)
18 {
19         char *e, q;
20
21         while(strchr("\t ", *s))
22                 s++;
23         q = 0;
24         if(*s == '"' || *s == '\'')
25                 q = *s++;
26         for(e = s; *e; e++){
27                 if(*e == q)
28                         break;
29                 if(isalnum(*e))
30                         continue;
31                 if(*e == '-' || *e == '_')
32                         continue;
33                 break;
34         }
35         if(e - s > 1)
36                 return smprint("%.*s", (int)(e-s), s);
37         return nil;
38 }
39
40 void
41 main(int argc, char *argv[])
42 {
43         int n, pfd[2], pflag = 0;
44         char *arg[4], *s, *p;
45         Rune r;
46
47         ARGBEGIN {
48         case 'h':
49                 usage();
50         case 'c':
51                 cset = EARGF(usage());
52                 break;
53         case 'p':
54                 pflag = 1;
55                 break;
56         } ARGEND;
57
58         if(*argv){
59                 close(0);
60                 if(open(*argv, OREAD) != 1)
61                         sysfatal("open: %r");
62         }
63         nbuf = 0;
64         p = buf;
65         while(nbuf < sizeof(buf)-1){
66                 if((n = read(0, buf + nbuf, sizeof(buf)-1-nbuf)) <= 0)
67                         break;
68                 nbuf += n;
69                 buf[nbuf] = 0;
70                 if(nbuf == n){
71                         if(memcmp(p, "\xEF\xBB\xBF", 3)==0){
72                                 p += 3;
73                                 cset = "utf";
74                                 break;
75                         }
76                         if(memcmp(p, "\xFE\xFF", 2) == 0){
77                                 p += 2;
78                                 cset = "unicode-be";
79                                 break;
80                         }
81                         if(memcmp(p, "\xFF\xFE", 2) == 0){
82                                 p += 2;
83                                 cset = "unicode-le";
84                                 break;
85                         }
86                 }
87                 if(s = cistrstr(p, "encoding="))
88                         if(s = strval(s+9)){
89                                 cset = s;
90                                 break;
91                         }
92                 if(s = cistrstr(p, "charset="))
93                         if(s = strval(s+8)){
94                                 cset = s;
95                                 break;
96                         }
97         }
98         nbuf -= p - buf;
99
100         if(cset == nil){
101                 cset = "utf";
102                 s = p;
103                 while(s+UTFmax < p+nbuf){
104                         s += chartorune(&r, s);
105                         if(r == Runeerror){
106                                 cset = "latin1";
107                                 break;
108                         }
109                 }
110         }
111
112         if(pflag){
113                 print("%s\n", cset);
114                 exits(0);
115         }
116
117         if(nbuf == 0){
118                 write(1, p, 0);
119                 exits(0);
120         }
121
122         if(pipe(pfd) < 0)
123                 sysfatal("pipe: %r");
124
125         switch(rfork(RFFDG|RFREND|RFPROC)){
126         case -1:
127                 sysfatal("fork: %r");
128         case 0:
129                 dup(pfd[0], 0);
130                 close(pfd[0]);
131                 close(pfd[1]);
132
133                 arg[0] = "rc";
134                 arg[1] = "-c";
135                 arg[2] = smprint("{tcs -f %s | tcs -f html} || cat", cset);
136                 arg[3] = nil;
137                 exec("/bin/rc", arg);
138         }
139
140         dup(pfd[1], 1);
141         close(pfd[0]);
142         close(pfd[1]);
143
144         while(nbuf > 0){
145                 if(write(1, p, nbuf) != nbuf)
146                         sysfatal("write: %r");
147                 p = buf;
148                 if((nbuf = read(0, p, sizeof(buf))) < 0)
149                         sysfatal("read: %r");
150         }
151         close(1);
152         waitpid();
153         exits(0);
154 }