]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/htmlroff/roff.c
9bootfat: rename open() to fileinit and make it static as its really a internal funct...
[plan9front.git] / sys / src / cmd / htmlroff / roff.c
1 #include "a.h"
2
3 enum
4 {
5         MAXREQ = 100,
6         MAXRAW = 40,
7         MAXESC = 60,
8         MAXLINE = 1024,
9         MAXIF = 20,
10         MAXARG = 10,
11 };
12
13 typedef struct Esc Esc;
14 typedef struct Req Req;
15 typedef struct Raw Raw;
16
17 /* escape sequence handler, like for \c */
18 struct Esc
19 {
20         Rune r;
21         int (*f)(void);
22         int mode;
23 };
24
25 /* raw request handler, like for .ie */
26 struct Raw
27 {
28         Rune *name;
29         void (*f)(Rune*);
30 };
31
32 /* regular request handler, like for .ft */
33 struct Req
34 {
35         int argc;
36         Rune *name;
37         void (*f)(int, Rune**);
38 };
39
40 int             dot = '.';
41 int             tick = '\'';
42 int             backslash = '\\';
43
44 int             inputmode;
45 Req             req[MAXREQ];
46 int             nreq;
47 Raw             raw[MAXRAW];
48 int             nraw;
49 Esc             esc[MAXESC];
50 int             nesc;
51 int             iftrue[MAXIF];
52 int             niftrue;
53
54 int isoutput;
55 int linepos;
56
57
58 void
59 addraw(Rune *name, void (*f)(Rune*))
60 {
61         Raw *r;
62         
63         if(nraw >= nelem(raw)){
64                 fprint(2, "too many raw requets\n");
65                 return;
66         }
67         r = &raw[nraw++];
68         r->name = erunestrdup(name);
69         r->f = f;
70 }
71
72 void
73 delraw(Rune *name)
74 {
75         int i;
76         
77         for(i=0; i<nraw; i++){
78                 if(runestrcmp(raw[i].name, name) == 0){
79                         if(i != --nraw){
80                                 free(raw[i].name);
81                                 raw[i] = raw[nraw];
82                         }
83                         return;
84                 }
85         }
86 }
87
88 void
89 renraw(Rune *from, Rune *to)
90 {
91         int i;
92         
93         delraw(to);
94         for(i=0; i<nraw; i++)
95                 if(runestrcmp(raw[i].name, from) == 0){
96                         free(raw[i].name);
97                         raw[i].name = erunestrdup(to);
98                         return;
99                 }
100 }
101
102
103 void
104 addreq(Rune *s, void (*f)(int, Rune**), int argc)
105 {
106         Req *r;
107
108         if(nreq >= nelem(req)){
109                 fprint(2, "too many requests\n");
110                 return;
111         }
112         r = &req[nreq++];
113         r->name = erunestrdup(s);
114         r->f = f;
115         r->argc = argc;
116 }
117
118 void
119 delreq(Rune *name)
120 {
121         int i;
122
123         for(i=0; i<nreq; i++){
124                 if(runestrcmp(req[i].name, name) == 0){
125                         if(i != --nreq){
126                                 free(req[i].name);
127                                 req[i] = req[nreq];
128                         }
129                         return;
130                 }
131         }
132 }
133
134 void
135 renreq(Rune *from, Rune *to)
136 {
137         int i;
138         
139         delreq(to);
140         for(i=0; i<nreq; i++)
141                 if(runestrcmp(req[i].name, from) == 0){
142                         free(req[i].name);
143                         req[i].name = erunestrdup(to);
144                         return;
145                 }
146 }
147
148 void
149 addesc(Rune r, int (*f)(void), int mode)
150 {
151         Esc *e;
152         
153         if(nesc >= nelem(esc)){
154                 fprint(2, "too many escapes\n");
155                 return;
156         }
157         e = &esc[nesc++];
158         e->r = r;
159         e->f = f;
160         e->mode = mode;
161 }
162
163 /*
164  * Get the next logical character in the input stream.
165  */
166 int
167 getnext(void)
168 {
169         int i, r;
170
171 next:
172         r = getrune();
173         if(r < 0)
174                 return -1;
175         if(r == Uformatted){
176                 br();
177                 assert(!isoutput);
178                 while((r = getrune()) >= 0 && r != Uunformatted){
179                         if(r == Uformatted)
180                                 continue;
181                         outrune(r);
182                 }
183                 goto next;
184         }
185         if(r == Uunformatted)
186                 goto next;
187         if(r == backslash){
188                 r = getrune();
189                 if(r < 0)
190                         return -1;
191                 for(i=0; i<nesc; i++){
192                         if(r == esc[i].r && (inputmode&esc[i].mode)==inputmode){
193                                 if(esc[i].f == e_warn)
194                                         warn("ignoring %C%C", backslash, r);
195                                 r = esc[i].f();
196                                 if(r <= 0)
197                                         goto next;
198                                 return r;
199                         }
200                 }
201                 if(inputmode&(ArgMode|CopyMode)){
202                         ungetrune(r);
203                         r = backslash;
204                 }
205         }
206         return r;
207 }
208
209 void
210 ungetnext(Rune r)
211 {
212         /*
213          * really we want to undo the getrunes that led us here,
214          * since the call after ungetnext might be getrune!
215          */
216         ungetrune(r);
217 }
218
219 int
220 _readx(Rune *p, int n, int nmode, int line)
221 {
222         int c, omode;
223         Rune *e;
224
225         while((c = getrune()) == ' ' || c == '\t')
226                 ;
227         ungetrune(c);
228         omode = inputmode;
229         inputmode = nmode;
230         e = p+n-1;
231         for(c=getnext(); p<e; c=getnext()){
232                 if(c < 0)
233                         break;
234                 if(!line && (c == ' ' || c == '\t'))
235                         break;
236                 if(c == '\n'){
237                         if(!line)
238                                 ungetnext(c);
239                         break;
240                 }
241                 *p++ = c;
242         }
243         inputmode = omode;
244         *p = 0;
245         if(c < 0)
246                 return -1;
247         return 0;
248 }
249
250 /*
251  * Get the next argument from the current line.
252  */
253 Rune*
254 copyarg(void)
255 {
256         static Rune buf[MaxLine];
257         int c;
258         Rune *r;
259         
260         if(_readx(buf, sizeof buf, ArgMode, 0) < 0)
261                 return nil;
262         r = runestrstr(buf, L("\\\""));
263         if(r){
264                 *r = 0;
265                 while((c = getrune()) >= 0 && c != '\n')
266                         ;
267                 ungetrune('\n');
268         }
269         r = erunestrdup(buf);   
270         return r;
271 }
272
273 /*
274  * Read the current line in given mode.  Newline not kept.
275  * Uses different buffer from copyarg!
276  */
277 Rune*
278 readline(int m)
279 {
280         static Rune buf[MaxLine];
281         Rune *r;
282
283         if(_readx(buf, sizeof buf, m, 1) < 0)
284                 return nil;
285         r = erunestrdup(buf);
286         return r;
287 }
288
289 /*
290  * Given the argument line (already read in copy+arg mode),
291  * parse into arguments.  Note that \" has been left in place
292  * during copy+arg mode parsing, so comments still need to be stripped.
293  */
294 int
295 parseargs(Rune *p, Rune **argv)
296 {
297         int argc;
298         Rune *w;
299
300         for(argc=0; argc<MAXARG; argc++){
301                 while(*p == ' ' || *p == '\t')
302                         p++;
303                 if(*p == 0)
304                         break;
305                 argv[argc] = p;
306                 if(*p == '"'){
307                         /* quoted argument */
308                         if(*(p+1) == '"'){
309                                 /* empty argument */
310                                 *p = 0;
311                                 p += 2;
312                         }else{
313                                 /* parse quoted string */
314                                 w = p++;
315                                 for(; *p; p++){
316                                         if(*p == '"' && *(p+1) == '"')
317                                                 *w++ = '"';
318                                         else if(*p == '"'){
319                                                 p++;
320                                                 break;
321                                         }else
322                                                 *w++ = *p;
323                                 }
324                                 *w = 0;
325                         }       
326                 }else{
327                         /* unquoted argument - need to watch out for \" comment */
328                         for(; *p; p++){
329                                 if(*p == ' ' || *p == '\t'){
330                                         *p++ = 0;
331                                         break;
332                                 }
333                                 if(*p == '\\' && *(p+1) == '"'){
334                                         *p = 0;
335                                         if(p != argv[argc])
336                                                 argc++;
337                                         return argc;
338                                 }
339                         }
340                 }
341         }
342         return argc;
343 }
344
345 /*
346  * Process a dot line.  The dot has been read.
347  */
348 void
349 dotline(int dot)
350 {
351         int argc, i;
352         Rune *a, *argv[1+MAXARG];
353
354         /*
355          * Read request/macro name
356          */
357         a = copyarg();
358         if(a == nil || a[0] == 0){
359                 free(a);
360                 getrune();      /* \n */
361                 return;
362         }
363         argv[0] = a;
364         /*
365          * Check for .if, .ie, and others with special parsing.
366          */
367         for(i=0; i<nraw; i++){
368                 if(runestrcmp(raw[i].name, a) == 0){
369                         raw[i].f(raw[i].name);
370                         free(a);
371                         return;
372                 }       
373         }
374
375         /*
376          * Read rest of line in copy mode, invoke regular request.
377          */
378         a = readline(ArgMode);
379         if(a == nil){
380                 free(argv[0]);
381                 return;
382         }
383         argc = 1+parseargs(a, argv+1);
384         for(i=0; i<nreq; i++){
385                 if(runestrcmp(req[i].name, argv[0]) == 0){
386                         if(req[i].argc != -1){
387                                 if(argc < 1+req[i].argc){
388                                         warn("not enough arguments for %C%S", dot, req[i].name);
389                                         free(argv[0]);
390                                         free(a);
391                                         return;
392                                 }
393                                 if(argc > 1+req[i].argc)
394                                         warn("too many arguments for %C%S", dot, req[i].name);
395                         }
396                         req[i].f(argc, argv);
397                         free(argv[0]);
398                         free(a);
399                         return;
400                 }
401         }
402
403         /*
404          * Invoke user-defined macros.
405          */
406         runmacro(dot, argc, argv);
407         free(argv[0]);
408         free(a);
409 }
410
411 /*
412  * newlines are magical in various ways.
413  */
414 int bol;
415 void
416 newline(void)
417 {
418         int n;
419
420         if(bol)
421                 sp(eval(L("1v")));
422         bol = 1;
423         if((n=getnr(L(".ce"))) > 0){
424                 nr(L(".ce"), n-1);
425                 br();
426         }
427         if(getnr(L(".fi")) == 0)
428                 br();
429         outrune('\n');
430 }
431
432 void
433 startoutput(void)
434 {
435         char *align;
436         double ps, vs, lm, rm, ti;
437         Rune buf[200];
438
439         if(isoutput)
440                 return;
441         isoutput = 1;
442
443         if(getnr(L(".paragraph")) == 0)
444                 return;
445
446         nr(L(".ns"), 0);
447         isoutput = 1;
448         ps = getnr(L(".s"));
449         if(ps <= 1)
450                 ps = 10;
451         ps /= 72.0;
452         USED(ps);
453
454         vs = getnr(L(".v"))*getnr(L(".ls")) * 1.0/UPI;
455         vs /= (10.0/72.0);      /* ps */
456         if(vs == 0)
457                 vs = 1.2;
458
459         lm = (getnr(L(".o"))+getnr(L(".i"))) * 1.0/UPI;
460         ti = getnr(L(".ti")) * 1.0/UPI;
461         nr(L(".ti"), 0);
462
463         rm = 8.0 - getnr(L(".l"))*1.0/UPI - getnr(L(".o"))*1.0/UPI;
464         if(rm < 0)
465                 rm = 0;
466         switch(getnr(L(".j"))){
467         default:
468         case 0:
469                 align = "left";
470                 break;
471         case 1:
472                 align = "justify";
473                 break;
474         case 3:
475                 align = "center";
476                 break;
477         case 5:
478                 align = "right";
479                 break;
480         }
481         if(getnr(L(".ce")))
482                 align = "center";
483         if(!getnr(L(".margin")))
484                 runesnprint(buf, nelem(buf), "<p style=\"line-height: %.1fem; text-indent: %.2fin; margin-top: 0; margin-bottom: 0; text-align: %s;\">\n",
485                         vs, ti, align);
486         else
487                 runesnprint(buf, nelem(buf), "<p style=\"line-height: %.1fem; margin-left: %.2fin; text-indent: %.2fin; margin-right: %.2fin; margin-top: 0; margin-bottom: 0; text-align: %s;\">\n",
488                         vs, lm, ti, rm, align);
489         outhtml(buf);
490 }
491 void
492 br(void)
493 {
494         if(!isoutput)
495                 return;
496         isoutput = 0;
497
498         nr(L(".dv"), 0);
499         dv(0);
500         hideihtml();
501         if(getnr(L(".paragraph")))
502                 outhtml(L("</p>"));
503 }
504
505 void
506 r_margin(int argc, Rune **argv)
507 {
508         USED(argc);
509
510         nr(L(".margin"), eval(argv[1]));
511 }
512
513 int inrequest;
514 void
515 runinput(void)
516 {
517         int c;
518         
519         bol = 1;
520         for(;;){
521                 c = getnext();
522                 if(c < 0)
523                         break;
524                 if((c == dot || c == tick) && bol){
525                         inrequest = 1;
526                         dotline(c);
527                         bol = 1;
528                         inrequest = 0;
529                 }else if(c == '\n'){
530                         newline();
531                         itrap();
532                         linepos = 0;
533                 }else{
534                         outtrap();
535                         startoutput();
536                         showihtml();
537                         if(c == '\t'){
538                                 /* XXX do better */
539                                 outrune(' ');
540                                 while(++linepos%4)
541                                         outrune(' ');
542                         }else{
543                                 outrune(c);
544                                 linepos++;
545                         }
546                         bol = 0;
547                 }
548         }
549 }
550
551 void
552 run(void)
553 {
554         t1init();
555         t2init();
556         t3init();
557         t4init();
558         t5init();
559         t6init();
560         t7init();
561         t8init();
562         /* t9init(); t9.c */
563         t10init();
564         t11init();
565         /* t12init(); t12.c */
566         t13init();
567         t14init();
568         t15init();
569         t16init();
570         t17init();
571         t18init();
572         t19init();
573         t20init();
574         htmlinit();
575         hideihtml();
576         
577         addreq(L("margin"), r_margin, 1);
578         nr(L(".margin"), 1);
579         nr(L(".paragraph"), 1);
580
581         runinput();
582         while(popinput())
583                 ;
584         dot = '.';
585         if(verbose)
586                 fprint(2, "eof\n");
587         runmacro1(L("eof"));
588         closehtml();
589 }
590
591 void
592 out(Rune *s)
593 {
594         if(s == nil)
595                 return;
596         for(; *s; s++)
597                 outrune(*s);
598 }
599
600 void (*outcb)(Rune);
601
602 void
603 inroman(Rune r)
604 {
605         int f;
606         
607         f = getnr(L(".f"));
608         nr(L(".f"), 1);
609         runmacro1(L("font"));
610         outrune(r);
611         nr(L(".f"), f);
612         runmacro1(L("font"));
613 }
614
615 void
616 Brune(Rune r)
617 {
618         if(r == '&')
619                 Bprint(&bout, "&amp;");
620         else if(r == '<')
621                 Bprint(&bout, "&lt;");
622         else if(r == '>')
623                 Bprint(&bout, "&gt;");
624         else if(r < Runeself || utf8)
625                 Bprint(&bout, "%C", r);
626         else
627                 Bprint(&bout, "%S", rune2html(r));
628 }
629
630 void
631 outhtml(Rune *s)
632 {
633         Rune r;
634         
635         for(; *s; s++){
636                 switch(r = *s){
637                 case '<':
638                         r = Ult;
639                         break;
640                 case '>':
641                         r = Ugt;
642                         break;
643                 case '&':
644                         r = Uamp;
645                         break;
646                 case ' ':
647                         r = Uspace;
648                         break;
649                 }
650                 outrune(r);
651         }
652 }
653
654 void
655 outrune(Rune r)
656 {
657         switch(r){
658         case ' ':
659                 if(getnr(L(".fi")) == 0)
660                         r = Unbsp;
661                 break;
662         case Uformatted:
663         case Uunformatted:
664                 abort();
665         }
666         if(outcb){
667                 if(r == ' ')
668                         r = Uspace;
669                 outcb(r);
670                 return;
671         }
672         /* writing to bout */
673         switch(r){
674         case Uempty:
675                 return;
676         case Upl:
677                 inroman('+');
678                 return;
679         case Ueq:
680                 inroman('=');
681                 return;
682         case Umi:
683                 inroman(0x2212);
684                 return;
685         case Utick:
686                 r = '\'';
687                 break;
688         case Ubtick:
689                 r = '`';
690                 break;
691         case Uminus:
692                 r = '-';
693                 break;
694         case '\'':
695                 Bprint(&bout, "&rsquo;");
696                 return;
697         case '`':
698                 Bprint(&bout, "&lsquo;");
699                 return;
700         case Uamp:
701                 Bputrune(&bout, '&');
702                 return;
703         case Ult:
704                 Bputrune(&bout, '<');
705                 return;
706         case Ugt:
707                 Bputrune(&bout, '>');
708                 return;
709         case Uspace:
710                 Bputrune(&bout, ' ');
711                 return;
712         case 0x2032:
713                 /*
714                  * In Firefox, at least, the prime is not
715                  * a superscript by default.
716                  */
717                 Bprint(&bout, "<sup>");
718                 Brune(r);
719                 Bprint(&bout, "</sup>");
720                 return;
721         }
722         Brune(r);
723 }
724
725 void
726 r_nop(int argc, Rune **argv)
727 {
728         USED(argc);
729         USED(argv);
730 }
731
732 void
733 r_warn(int argc, Rune **argv)
734 {
735         USED(argc);
736         warn("ignoring %C%S", dot, argv[0]);
737 }
738
739 int
740 e_warn(void)
741 {
742         /* dispatch loop prints a warning for us */
743         return 0;
744 }
745
746 int
747 e_nop(void)
748 {
749         return 0;
750 }