]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/aux/wikifmt.c
a28e5e2d6444a83d44f994c82b1e837df3df2526
[plan9front.git] / sys / src / cmd / aux / wikifmt.c
1 /*
2  * google code wiki to html converter.
3  * https://code.google.com/p/support/wiki/WikiSyntax
4  */
5 #include <u.h>
6 #include <libc.h>
7
8 enum {
9         HUNK = 8*1024,
10 };
11
12 char    *buf;
13 char    *pos;
14 char    *epos;
15 char    *opos;
16
17 int     inquote = 0;
18 int     intable = 0;
19 int     inlist = 0;
20 int     indent = -1;
21
22 void    body(void);
23
24 int
25 match(char *s)
26 {
27         int n;
28
29         n = strlen(s);
30         if(pos+n > epos)
31                 return 0;
32         return cistrncmp(pos, s, n) == 0;
33 }
34
35 int
36 got(char *s)
37 {
38         if(!match(s))
39                 return 0;
40         pos += strlen(s);
41         return 1;
42 }
43
44 char*
45 look(char *s, char *e)
46 {
47         char *p;
48         int n;
49
50         if(e == nil)
51                 e = epos;
52         n = strlen(s);
53         e -= n;
54         for(p = pos; p <= e; p++)
55                 if(cistrncmp(p, s, n) == 0)
56                         return p;
57         return nil;
58 }
59
60 void
61 eatspace(void)
62 {
63         while(pos < epos && (*pos == ' ' || *pos == '\t'))
64                 pos++;
65 }
66
67 char*
68 trimback(char *s)
69 {
70         while(s > pos && strchr("\t ", s[-1]) != nil)
71                 s--;
72         return s;
73 }
74
75 void
76 flush(void)
77 {
78         int n;
79
80         n = opos - buf;
81         if(n <= 0)
82                 return;
83         if(write(1, buf, n) != n)
84                 sysfatal("write: %r");
85         opos = buf;
86 }
87
88 void
89 output(char *s, int n)
90 {
91         int r;
92
93         if(n <= 0)
94                 return;
95         r = HUNK - (opos - buf);
96         if(n > r){
97                 output(s, r);
98                 output(s+r, n-r);
99         } else {
100                 memmove(opos, s, n);
101                 opos += n;
102                 if(r == n)
103                         flush();
104         }
105 }
106
107 void
108 string(char *s)
109 {
110         output(s, strlen(s));
111 }
112
113 void
114 escape(char *e)
115 {
116         char *p;
117
118         for(p = pos; p < e; p++)
119                 if(*p == '<'){
120                         output(pos, p - pos);
121                         pos = p+1;
122                         string("&lt;");
123                 } else if(*p == '>'){
124                         output(pos, p - pos);
125                         pos = p+1;
126                         string("&gt;");
127                 } else if(*p == '&'){
128                         output(pos, p - pos);
129                         pos = p+1;
130                         string("&amp;");
131                 }
132         output(pos, p - pos);
133         pos = p;
134 }
135
136 void
137 ebody(char *e)
138 {
139         char *t;
140
141         t = epos;
142         epos = trimback(e);
143         body();
144         pos = e;
145         epos = t;
146 }
147
148 int
149 tag(char *term, char *tag)
150 {
151         char *e;
152
153         if(!got(term))
154                 return 0;
155         if(e = look(term, nil)){
156                 eatspace();
157                 string("<"); string(tag); string(">");
158                 ebody(e);
159                 string("</"); string(tag); string(">");
160                 pos += strlen(term);
161         } else
162                 string(term);
163         return 1;
164 }
165
166 int
167 heading(void)
168 {
169         char *o, *s, *e;
170         int n;
171
172         for(s = "======"; *s; s++)
173                 if(got(s))
174                         break;
175         if(*s == 0)
176                 return 0;
177         n = strlen(s);
178         e = look("=", look("\n", nil));
179         if(e == nil)
180                 e = look("\n", nil);
181         if(e == nil)
182                 e = epos;
183         eatspace();
184         string("<h");
185         output("0123456"+n, 1);
186         string("><a name=\"");
187         o = pos;
188         s = trimback(e);
189         while(pos < s){
190                 if((*pos >= 'a' && *pos <= 'z')
191                 || (*pos >= 'A' && *pos <= 'Z')
192                 || (*pos >= '0' && *pos <= '9')
193                 || (*pos == '-'))
194                         output(pos, 1);
195                 else
196                         output("_", 1);
197                 pos++;
198         }
199         string("\"></a>");
200         pos = o;
201         ebody(e);
202         while(got("="))
203                 ;
204         string("</h");
205         output("0123456"+n, 1);
206         string(">");
207         return 1;
208 }
209
210 void
211 link(char *e)
212 {
213         char *s, *o;
214
215         s = o = pos;
216         while(s < epos){
217                 if(e != nil && s >= e)
218                         break;
219                 if(*s == 0 || strchr("<>[] \t\r\n", *s) != nil)
220                         break;
221                 s++;
222         }
223         if(s-4 >= o)
224         if(cistrncmp(s-4, ".png", 4)
225         && cistrncmp(s-4, ".jpg", 4)
226         && cistrncmp(s-4, ".gif", 4)){
227                 string("<a href=\"");
228                 escape(s);
229                 string("\">");
230                 eatspace();
231                 if(e != nil && pos < e)
232                         ebody(e);
233                 else {
234                         pos = o;
235                         escape(s);
236                 }
237                 string("</a>");
238         } else {
239                 string("<img src=\"");
240                 escape(s);
241                 string("\">");
242         }
243 }
244
245 void
246 body(void)
247 {
248         char *s;
249         int t;
250
251 Next:
252         if(pos >= epos)
253                 return;
254
255         if(got("\n") || got("\r\n"))
256                 indent = -1;
257         if(got("\n") || got("\r\n")){
258                 string("<br>");
259                 while(got("\n") || got("\r\n"))
260                         ;
261         }
262
263         if(indent == -1){
264                 indent = 0;
265                 for(;;){
266                         if(got(" "))
267                                 indent++;
268                         else if(got("\t")){
269                                 indent += 8;
270                                 indent %= 8;
271                         }
272                         else break;
273                 }
274
275                 if(intable && look("||", look("\n", nil)) == nil){
276                         string("</table>");
277                         intable = 0;
278                 }
279
280                 string("\n");
281                 if((indent < inlist) || (indent < inquote))
282                         return;
283
284                 while(indent > 0){
285                         if(pos >= epos)
286                                 return;
287                         if(got("*") || got("#")){
288                                 s = pos-1;
289                                 eatspace();
290                                 if(indent > inlist){
291                                         if(*s == '*')
292                                                 string("<ul><li>");
293                                         else
294                                                 string("<ol><li>");
295                                         t = inlist;
296                                         inlist = indent;
297                                         body();
298                                         inlist = t;
299                                         if(*s == '*')
300                                                 string("</li></ul>");
301                                         else
302                                                 string("</li></ol>");
303                                 } else {
304                                         string("</li><li>");
305                                         break;
306                                 }
307                         } else if(indent > inquote){
308                                 string("<blockquote>");
309                                 t = inquote;
310                                 inquote = indent;
311                                 body();
312                                 inquote = t;
313                                 string("</blockquote>");
314                         } else
315                                 break;
316                 }
317
318                 if(indent == 0){
319                         if(got("#")){
320                                 if((pos = look("\n", nil)) == nil)
321                                         pos = epos;
322                                 goto Next;
323                         }
324                         if(heading())
325                                 goto Next;
326                         if(got("----")){
327                                 while(got("-"))
328                                         ;
329                                 string("<hr>");
330                                 goto Next;
331                         }
332                 }
333         }
334
335         if(got("`")){
336                 if(s = look("`", nil)){
337                         escape(s);
338                         pos = s+1;
339                 } else
340                         string("`");
341         }
342         else if(got("<")){
343                 string("<");
344                 if(s = look(">", nil)){
345                         s++;
346                         output(pos, s - pos);
347                         pos = s;
348                 }
349         }
350         else if(got("[")){
351                 if(s = look("]", nil)){
352                         link(s);
353                         pos = s+1;
354                 } else
355                         string("[");
356         }
357         else if(tag("*", "b") ||
358                 tag("_", "i") ||
359                 tag("^", "sup") ||
360                 tag(",,", "sub") ||
361                 tag("~~", "strike")){
362         }
363         else if(got("{{{")){
364                 if(s = look("}}}", nil)){
365                         if(look("\n", s)){
366                                 string("<pre>");
367                                 escape(s);
368                                 string("</pre>");
369                         } else {
370                                 string("<tt>");
371                                 escape(s);
372                                 string("</tt>");
373                         }
374                         pos = s+3;
375                 } else
376                         string("{{{");
377         }
378         else if(got("||")){
379                 if(s = look("||", look("\n", nil))){
380                         eatspace();
381                         switch(intable){
382                         case 0: string("<table>");
383                                 intable++;
384                         case 1: string("<tr>");
385                                 intable++;
386                         }
387                         string("<td>");
388                         ebody(s);
389                         string("</td>");
390                 } else if(intable){
391                         string("</tr>");
392                         intable = 1;
393                 }
394         }
395         else if(match("http://"))
396                 link(nil);
397         else if(match("https://"))
398                 link(nil);
399         else if(match("ftp://"))
400                 link(nil);
401         else{
402                 output(pos, 1);
403                 pos++;
404         }
405         goto Next;
406 }
407
408 void
409 usage(void)
410 {
411         fprint(2, "usage: %s [ file ]\n", argv0);
412         exits("usage");
413 }
414
415 void
416 main(int argc, char *argv[])
417 {
418         int n;
419
420         ARGBEGIN{
421         default:
422                 usage();
423         }ARGEND;
424
425         if(argc != 0 && argc != 1)
426                 usage();
427
428         if(*argv){
429                 if((n = open(*argv, OREAD)) < 0)
430                         sysfatal("open %s: %r", *argv);
431                 if(dup(n, 0) < 0)
432                         sysfatal("dup: %r");
433         }
434
435         buf = opos = sbrk(HUNK);
436         pos = epos = buf + HUNK;
437         for(;;){
438                 if(brk(epos + HUNK + 8) < 0)
439                         sysfatal("brk: %r");
440                 if((n = read(0, epos, HUNK)) < 0)
441                         sysfatal("read: %r");
442                 if(n == 0)
443                         break;
444                 epos += n;
445         }
446         if(epos > pos && epos[-1] != '\n')
447                 *epos++ = '\n';
448
449         body();
450         flush();
451         exits(0);
452 }