]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/aux/wikifmt.c
wikifmt: more complete title to name= anchor translation
[plan9front.git] / sys / src / cmd / aux / wikifmt.c
1 /*
2  * google code wiki to html converter.
3  * https://code.google.com/p/support/wiki/WikiSyntax
4  */
5 #include <u.h>
6 #include <libc.h>
7
8 enum {
9         HUNK = 8*1024,
10 };
11
12 char    *buf;
13 char    *pos;
14 char    *epos;
15 char    *opos;
16
17 int     inquote = 0;
18 int     intable = 0;
19 int     inlist = 0;
20 int     indent = -1;
21
22 void    body(void);
23
24 int
25 match(char *s)
26 {
27         int n;
28
29         n = strlen(s);
30         if(pos+n > epos)
31                 return 0;
32         return cistrncmp(pos, s, n) == 0;
33 }
34
35 int
36 got(char *s)
37 {
38         if(!match(s))
39                 return 0;
40         pos += strlen(s);
41         return 1;
42 }
43
44 char*
45 look(char *s, char *e)
46 {
47         char *p;
48         int n;
49
50         if(e == nil)
51                 e = epos;
52         n = strlen(s);
53         e -= n;
54         for(p = pos; p <= e; p++)
55                 if(cistrncmp(p, s, n) == 0)
56                         return p;
57         return nil;
58 }
59
60 void
61 eatspace(void)
62 {
63         while(pos < epos && (*pos == ' ' || *pos == '\t'))
64                 pos++;
65 }
66
67 char*
68 trimback(char *s)
69 {
70         while(s > pos && strchr("\t ", s[-1]) != nil)
71                 s--;
72         return s;
73 }
74
75 void
76 flush(void)
77 {
78         int n;
79
80         n = opos - buf;
81         if(n <= 0)
82                 return;
83         if(write(1, buf, n) != n)
84                 sysfatal("write: %r");
85         opos = buf;
86 }
87
88 void
89 output(char *s, int n)
90 {
91         int r;
92
93         if(n <= 0)
94                 return;
95         r = HUNK - (opos - buf);
96         if(n > r){
97                 output(s, r);
98                 output(s+r, n-r);
99         } else {
100                 memmove(opos, s, n);
101                 opos += n;
102                 if(r == n)
103                         flush();
104         }
105 }
106
107 void
108 string(char *s)
109 {
110         output(s, strlen(s));
111 }
112
113 void
114 escape(char *e)
115 {
116         char *p;
117
118         for(p = pos; p < e; p++)
119                 if(*p == '<'){
120                         output(pos, p - pos);
121                         pos = p+1;
122                         string("&lt;");
123                 } else if(*p == '>'){
124                         output(pos, p - pos);
125                         pos = p+1;
126                         string("&gt;");
127                 } else if(*p == '&'){
128                         output(pos, p - pos);
129                         pos = p+1;
130                         string("&amp;");
131                 }
132         output(pos, p - pos);
133         pos = p;
134 }
135
136 void
137 ebody(char *e)
138 {
139         char *t;
140
141         t = epos;
142         epos = trimback(e);
143         body();
144         pos = e;
145         epos = t;
146 }
147
148 int
149 tag(char *term, char *tag)
150 {
151         char *e;
152
153         if(!got(term))
154                 return 0;
155         if(e = look(term, nil)){
156                 eatspace();
157                 string("<"); string(tag); string(">");
158                 ebody(e);
159                 string("</"); string(tag); string(">");
160                 pos += strlen(term);
161         } else
162                 string(term);
163         return 1;
164 }
165
166 int
167 heading(void)
168 {
169         char *o, *s, *e;
170         int n;
171
172         for(s = "======"; *s; s++)
173                 if(got(s))
174                         break;
175         if(*s == 0)
176                 return 0;
177         n = strlen(s);
178         e = look("=", look("\n", nil));
179         if(e == nil)
180                 e = look("\n", nil);
181         if(e == nil)
182                 e = epos;
183         eatspace();
184         string("<h");
185         output("0123456"+n, 1);
186         string("><a name=\"");
187         o = pos;
188         s = trimback(e);
189         while(pos < s){
190                 if((*pos >= 'a' && *pos <= 'z')
191                 || (*pos >= 'A' && *pos <= 'Z')
192                 || (*pos >= '0' && *pos <= '9')
193                 || (strchr("!#$%&()_+,-./{|}~:;=?@[\\]^_`", *pos) != 0))
194                         output(pos, 1);
195                 else if(*pos == ' ' || *pos == '\t')
196                         output("_", 1);
197                 else if(*pos == '<')
198                         output("&lt;", 4);
199                 else if(*pos == '>')
200                         output("&gt;", 4);
201                 else if(*pos == '&')
202                         output("&amp;", 5);
203                 else if(*pos == '"')
204                         output("&quot;", 6);
205                 else if(*pos == '\'')
206                         output("&#39;", 5);
207                 pos++;
208         }
209         string("\"></a>");
210         pos = o;
211         ebody(e);
212         while(got("="))
213                 ;
214         string("</h");
215         output("0123456"+n, 1);
216         string(">");
217         return 1;
218 }
219
220 void
221 link(char *e)
222 {
223         char *s, *o;
224
225         s = o = pos;
226         while(s < epos){
227                 if(e != nil && s >= e)
228                         break;
229                 if(*s == 0 || strchr("<>[] \t\r\n", *s) != nil)
230                         break;
231                 s++;
232         }
233         if(s-4 >= o)
234         if(cistrncmp(s-4, ".png", 4)
235         && cistrncmp(s-4, ".jpg", 4)
236         && cistrncmp(s-4, ".gif", 4)){
237                 string("<a href=\"");
238                 escape(s);
239                 string("\">");
240                 eatspace();
241                 if(e != nil && pos < e)
242                         ebody(e);
243                 else {
244                         pos = o;
245                         escape(s);
246                 }
247                 string("</a>");
248         } else {
249                 string("<img src=\"");
250                 escape(s);
251                 string("\">");
252         }
253 }
254
255 void
256 body(void)
257 {
258         char *s;
259         int t;
260
261 Next:
262         if(pos >= epos)
263                 return;
264
265         if(got("\n") || got("\r\n"))
266                 indent = -1;
267         if(got("\n") || got("\r\n")){
268                 string("<br>");
269                 while(got("\n") || got("\r\n"))
270                         ;
271         }
272
273         if(indent == -1){
274                 indent = 0;
275                 for(;;){
276                         if(got(" "))
277                                 indent++;
278                         else if(got("\t")){
279                                 indent += 8;
280                                 indent %= 8;
281                         }
282                         else break;
283                 }
284
285                 if(intable && look("||", look("\n", nil)) == nil){
286                         string("</table>");
287                         intable = 0;
288                 }
289
290                 string("\n");
291                 if((indent < inlist) || (indent < inquote))
292                         return;
293
294                 while(indent > 0){
295                         if(pos >= epos)
296                                 return;
297                         if(got("*") || got("#")){
298                                 s = pos-1;
299                                 eatspace();
300                                 if(indent > inlist){
301                                         if(*s == '*')
302                                                 string("<ul><li>");
303                                         else
304                                                 string("<ol><li>");
305                                         t = inlist;
306                                         inlist = indent;
307                                         body();
308                                         inlist = t;
309                                         if(*s == '*')
310                                                 string("</li></ul>");
311                                         else
312                                                 string("</li></ol>");
313                                 } else {
314                                         string("</li><li>");
315                                         break;
316                                 }
317                         } else if(indent > inquote){
318                                 string("<blockquote>");
319                                 t = inquote;
320                                 inquote = indent;
321                                 body();
322                                 inquote = t;
323                                 string("</blockquote>");
324                         } else
325                                 break;
326                 }
327
328                 if(indent == 0){
329                         if(got("#")){
330                                 if((pos = look("\n", nil)) == nil)
331                                         pos = epos;
332                                 goto Next;
333                         }
334                         if(heading())
335                                 goto Next;
336                         if(got("----")){
337                                 while(got("-"))
338                                         ;
339                                 string("<hr>");
340                                 goto Next;
341                         }
342                 }
343         }
344
345         if(got("`")){
346                 if(s = look("`", nil)){
347                         escape(s);
348                         pos = s+1;
349                 } else
350                         string("`");
351         }
352         else if(got("<")){
353                 string("<");
354                 if(s = look(">", nil)){
355                         s++;
356                         output(pos, s - pos);
357                         pos = s;
358                 }
359         }
360         else if(got("[")){
361                 if(s = look("]", nil)){
362                         link(s);
363                         pos = s+1;
364                 } else
365                         string("[");
366         }
367         else if(tag("*", "b") ||
368                 tag("_", "i") ||
369                 tag("^", "sup") ||
370                 tag(",,", "sub") ||
371                 tag("~~", "strike")){
372         }
373         else if(got("{{{")){
374                 if(s = look("}}}", nil)){
375                         if(look("\n", s)){
376                                 string("<pre>");
377                                 escape(s);
378                                 string("</pre>");
379                         } else {
380                                 string("<tt>");
381                                 escape(s);
382                                 string("</tt>");
383                         }
384                         pos = s+3;
385                 } else
386                         string("{{{");
387         }
388         else if(got("||")){
389                 if(s = look("||", look("\n", nil))){
390                         eatspace();
391                         switch(intable){
392                         case 0: string("<table>");
393                                 intable++;
394                         case 1: string("<tr>");
395                                 intable++;
396                         }
397                         string("<td>");
398                         ebody(s);
399                         string("</td>");
400                 } else if(intable){
401                         string("</tr>");
402                         intable = 1;
403                 }
404         }
405         else if(match("http://"))
406                 link(nil);
407         else if(match("https://"))
408                 link(nil);
409         else if(match("ftp://"))
410                 link(nil);
411         else{
412                 output(pos, 1);
413                 pos++;
414         }
415         goto Next;
416 }
417
418 void
419 usage(void)
420 {
421         fprint(2, "usage: %s [ file ]\n", argv0);
422         exits("usage");
423 }
424
425 void
426 main(int argc, char *argv[])
427 {
428         int n;
429
430         ARGBEGIN{
431         default:
432                 usage();
433         }ARGEND;
434
435         if(argc != 0 && argc != 1)
436                 usage();
437
438         if(*argv){
439                 if((n = open(*argv, OREAD)) < 0)
440                         sysfatal("open %s: %r", *argv);
441                 if(dup(n, 0) < 0)
442                         sysfatal("dup: %r");
443         }
444
445         buf = opos = sbrk(HUNK);
446         pos = epos = buf + HUNK;
447         for(;;){
448                 if(brk(epos + HUNK + 8) < 0)
449                         sysfatal("brk: %r");
450                 if((n = read(0, epos, HUNK)) < 0)
451                         sysfatal("read: %r");
452                 if(n == 0)
453                         break;
454                 epos += n;
455         }
456         if(epos > pos && epos[-1] != '\n')
457                 *epos++ = '\n';
458
459         body();
460         flush();
461         exits(0);
462 }