]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/wikifs/parse.c
merge
[plan9front.git] / sys / src / cmd / wikifs / parse.c
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <String.h>
5 #include <ctype.h>
6 #include <thread.h>
7 #include "wiki.h"
8
9 static Wpage*
10 mkwtxt(int type, char *text)
11 {
12         Wpage *w;
13
14         w = emalloc(sizeof(*w));
15         w->type = type;
16         w->text = text;
17         setmalloctag(w, getcallerpc(&type));
18         return w;
19 }
20
21 /*
22  * turn runs of whitespace into single spaces,
23  * eliminate whitespace at beginning and end.
24  */
25 char*
26 strcondense(char *s, int cutbegin)
27 {
28         char *r, *w, *es;
29         int inspace;
30
31         es = s+strlen(s);
32         inspace = cutbegin;
33         for(r=w=s; *r; r++){
34                 if(isspace(*r)){
35                         if(!inspace){
36                                 inspace=1;
37                                 *w++ = ' ';
38                         }
39                 }else{
40                         inspace=0;
41                         *w++ = *r;
42                 }
43         }
44         assert(w <= es);
45         if(inspace && w>s){
46                 --w;
47                 *w = '\0';
48         }
49         else
50                 *w = '\0';
51         return s;
52 }
53
54 /*
55  * turn runs of Wplain into single Wplain.
56  */
57 static Wpage*
58 wcondense(Wpage *wtxt)
59 {
60         Wpage *ow, *w;
61
62         for(w=wtxt; w; ){
63                 if(w->type == Wplain)
64                         strcondense(w->text, 1);
65
66                 if(w->type != Wplain || w->next==nil
67                 || w->next->type != Wplain){
68                         w=w->next;
69                         continue;
70                 }
71
72                 w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
73                 strcat(w->text, " ");
74                 strcat(w->text, w->next->text);
75                 
76                 ow = w->next;
77                 w->next = ow->next;
78                 ow->next = nil;
79                 freepage(ow);
80         }
81         return wtxt;
82 }
83
84 /*
85  * Parse a link, without the brackets.
86  */
87 static Wpage*
88 mklink(char *s)
89 {
90         char *q;
91         Wpage *w;
92
93         for(q=s; *q && *q != '|'; q++)
94                 ;
95
96         if(*q == '\0'){
97                 w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
98                 w->url = nil;
99         }else{
100                 *q = '\0';
101                 w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
102                 w->url = estrdup(strcondense(q+1, 1));
103         }
104         setmalloctag(w, getcallerpc(&s));
105         return w;
106 }
107
108 /*
109  * Parse Wplains, inserting Wlink nodes where appropriate.
110  */
111 static Wpage*
112 wlink(Wpage *wtxt)
113 {
114         char *p, *q, *r, *s;
115         Wpage *w, *nw;
116
117         for(w=wtxt; w; w=nw){
118                 nw = w->next;
119                 if(w->type != Wplain)
120                         continue;
121                 while(w->text[0]){
122                         p = w->text;
123                         for(q=p; *q && *q != '['; q++)
124                                 ;
125                         if(*q == '\0')
126                                 break;
127                         for(r=q; *r && *r != ']'; r++)
128                                 ;
129                         if(*r == '\0')
130                                 break;
131                         *q = '\0';
132                         *r = '\0';
133                         s = w->text;
134                         w->text = estrdup(w->text);
135                         w->next = mklink(q+1);
136                         w = w->next;
137                         w->next = mkwtxt(Wplain, estrdup(r+1));
138                         free(s);
139                         w = w->next;
140                         w->next = nw;
141                 }
142                 assert(w->next == nw);
143         }
144         return wtxt;    
145 }
146
147 static int
148 ismanchar(int c)
149 {
150         return ('a' <= c && c <= 'z')
151                 || ('A' <= c && c <= 'Z')
152                 || ('0' <= c && c <= '9')
153                 || c=='_' || c=='-' || c=='.' || c=='/'
154                 || (c < 0);     /* UTF */
155 }
156
157 static Wpage*
158 findmanref(char *p, char **beginp, char **endp)
159 {
160         char *q, *r;
161         Wpage *w;
162
163         q=p;
164         for(;;){
165                 for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
166                         ;
167                 if(*q == '\0')
168                         break;
169                 for(r=q; r>p && ismanchar(r[-1]); r--)
170                         ;
171                 if(r==q){
172                         q += 3;
173                         continue;
174                 }
175                 *q = '\0';
176                 w = mkwtxt(Wman, estrdup(r));
177                 *beginp = r;
178                 *q = '(';
179                 w->section = q[1]-'0';
180                 *endp = q+3;
181                 setmalloctag(w, getcallerpc(&p));
182                 return w;
183         }
184         return nil;
185 }
186
187 /*
188  * Parse Wplains, looking for man page references.
189  * This should be done by using a plumb(6)-style 
190  * control file rather than hard-coding things here.
191  */
192 static Wpage*
193 wman(Wpage *wtxt)
194 {
195         char *q, *r;
196         Wpage *w, *mw, *nw;
197
198         for(w=wtxt; w; w=nw){
199                 nw = w->next;
200                 if(w->type != Wplain)
201                         continue;
202                 while(w->text[0]){
203                         if((mw = findmanref(w->text, &q, &r)) == nil)
204                                 break;
205                         *q = '\0';
206                         w->next = mw;
207                         w = w->next;
208                         w->next = mkwtxt(Wplain, estrdup(r));
209                         w = w->next;
210                         w->next = nw;
211                 }
212                 assert(w->next == nw);
213         }
214         return wtxt;    
215 }
216
217 static int isheading(char *p) {
218         Rune r;
219         int hasupper=0;
220         while(*p) {
221                 p+=chartorune(&r,p);
222                 if(isupperrune(r))
223                         hasupper=1;
224                 else if(islowerrune(r))
225                         return 0;
226         }
227         return hasupper;
228 }
229
230 Wpage*
231 Brdpage(char *(*rdline)(void*,int), void *b)
232 {
233         char *p, *c;
234         int waspara;
235         Wpage *w, **pw;
236
237         w = nil;
238         pw = &w;
239         waspara = 1;
240         while((p = rdline(b, '\n')) != nil){
241                 if(p[0] != '!')
242                         p = strcondense(p, 1);
243                 if(p[0] == '\0'){
244                         if(waspara==0){
245                                 waspara=1;
246                                 *pw = mkwtxt(Wpara, nil);
247                                 pw = &(*pw)->next;
248                         }
249                         continue;
250                 }
251                 waspara = 0;
252                 switch(p[0]){
253                 case '*':
254                         *pw = mkwtxt(Wbullet, nil);
255                         pw = &(*pw)->next;
256                         *pw = mkwtxt(Wplain, estrdup(p+1));
257                         pw = &(*pw)->next;
258                         break;
259                 case '!':
260                         *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
261                         pw = &(*pw)->next;
262                         break;
263                 case '-':
264                         for(c = p; *c != '\0'; c++) {
265                                 if(*c != '-') {
266                                         c = p;
267                                         break;
268                                 }
269                         }
270
271                         if( (c-p) > 4) {
272                                 *pw = mkwtxt(Whr, nil);
273                                 pw = &(*pw)->next;
274                                 break;
275                         }
276                         /* else fall thru */
277                 default:
278                         if(isheading(p)){
279                                 *pw = mkwtxt(Wheading, estrdup(p));
280                                 pw = &(*pw)->next;
281                                 continue;
282                         }
283                         *pw = mkwtxt(Wplain, estrdup(p));
284                         pw = &(*pw)->next;
285                         break;
286                 }
287         }
288         if(w == nil)
289                 werrstr("empty page");
290         
291         *pw = nil;
292         w = wcondense(w);
293         w = wlink(w);
294         w = wman(w);
295         setmalloctag(w, getcallerpc(&rdline));
296
297         return w;               
298 }
299
300 void
301 printpage(Wpage *w)
302 {
303         for(; w; w=w->next){
304                 switch(w->type){
305                 case Wpara:
306                         print("para\n");
307                         break;
308                 case Wheading:
309                         print("heading '%s'\n", w->text);
310                         break;
311                 case Wbullet:
312                         print("bullet\n");
313                         break;
314                 case Wlink:
315                         print("link '%s' '%s'\n", w->text, w->url);
316                         break;
317                 case Wman:
318                         print("man %d %s\n", w->section, w->text);
319                         break;
320                 case Wplain:
321                         print("plain '%s'\n", w->text);
322                         break;
323                 case Whr:
324                         print("hr\n");
325                         break;
326                 case Wpre:
327                         print("pre '%s'\n", w->text);
328                         break;
329                 }
330         }
331 }