]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/aux/msexceltables.c
abaco: cleanup, handle image/x-icon, don't use backspace as a hotkey, and remove...
[plan9front.git] / sys / src / cmd / aux / msexceltables.c
1 /* msexceltables.c    Steve Simon    5-Jan-2005 */
2 #include <u.h>
3 #include <libc.h>
4 #include <bio.h>
5 #include <ctype.h>
6
7 enum {
8         Tillegal = 0,
9         Tnumber,                // cell types
10         Tlabel,
11         Tindex,
12         Tbool,
13         Terror,
14
15         Ver8 = 0x600,           // only BIFF8 and BIFF8x files support unicode
16
17         Nwidths = 4096,
18 };
19         
20         
21 typedef struct Biff Biff;
22 typedef struct Col Col;
23 typedef struct Row Row;
24
25 struct Row {
26         Row *next;              // next row
27         int r;                  // row number
28         Col *col;               // list of cols in row
29 };
30
31 struct Col {
32         Col *next;              // next col in row
33         int c;                  // col number
34         int f;                  // index into formating table (Xf)
35         int type;               // type of value for union below
36         union {                 // value
37                 int index;      // index into string table (Strtab)
38                 int error;
39                 int bool;
40                 char *label;
41                 double number;
42         };
43 };
44
45 struct  Biff {
46         Biobuf *bp;             // input file
47         int op;                 // current record type
48         int len;                // length of current record
49 };
50
51 // options
52 static int Nopad = 0;           // disable padding cells to colum width
53 static int Trunc = 0;           // truncate cells to colum width
54 static int All = 0;             // dump all sheet types, Worksheets only by default
55 static char *Delim = " ";       // field delimiter
56 static char *Sheetrange = nil;  // range of sheets wanted
57 static char *Columnrange = nil; // range of collums wanted
58 static int Debug = 0;
59
60 // file scope
61 static int Defwidth = 10;       // default colum width if non given
62 static int Biffver;             // file vesion
63 static int Datemode;            // date ref: 1899-Dec-31 or 1904-jan-1
64 static char **Strtab = nil;     // label contents heap
65 static int Nstrtab = 0;         // # of above
66 static int *Xf;                 // array of extended format indices
67 static int Nxf = 0;             // # of above
68 static Biobuf *bo;              // stdout (sic)
69 static int Doquote = 1;         // quote text fields if they are rc(1) unfriendly
70
71 // table scope
72 static int Width[Nwidths];      // array of colum widths
73 static int Ncols = -1;          // max colums in table used
74 static int Content = 0;         // type code for contents of sheet
75 static Row *Root = nil;         // one worksheet's worth of cells
76                                 
77 static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
78         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
79
80 static char *Errmsgs[] = {
81         [0x0]   "#NULL!",       // intersection of two cell ranges is empty
82         [0x7]   "#DIV/0!",      // division by zero     
83         [0xf]   "#VALUE!",      // wrong type of operand
84         [0x17]  "#REF!",        // illegal or deleted cell reference
85         [0x1d]  "#NAME?",       // wrong function or range name
86         [0x24]  "#NUM!",        // value range overflow
87         [0x2a]  "#N/A!",        // argument of function not available
88 };
89
90 int
91 wanted(char *range, int here)
92 {
93         int n, s;
94         char *p;
95
96         if (! range)
97                 return 1;
98
99         s = -1;
100         p = range;
101         while(1){
102                 n = strtol(p, &p, 10);
103                 switch(*p){
104                 case 0:
105                         if(n == here)
106                                 return 1;
107                         if(s != -1 && here > s && here < n)
108                                 return 1;
109                         return 0;
110                 case ',':
111                         if(n == here)
112                                 return 1;
113                         if(s != -1 && here > s && here < n)
114                                 return 1;
115                         s = -1;
116                         p++;
117                         break;
118                 case '-':
119                         if(n == here)
120                                 return 1;
121                         s = n;
122                         p++;
123                         break;
124                 default:
125                         sysfatal("%s malformed range spec", range);
126                         break;
127                 }
128         }
129 }
130
131         
132 void
133 cell(int r, int c, int f, int type, void *val)
134 {
135         Row *row, *nrow;
136         Col *col, *ncol;
137
138         if(c > Ncols)
139                 Ncols = c;
140
141         if((ncol = malloc(sizeof(Col))) == nil)
142                 sysfatal("no memory");
143         ncol->c = c;
144         ncol->f = f;
145         ncol->type = type;
146         ncol->next = nil;
147
148         switch(type){
149         case Tnumber:   ncol->number = *(double *)val;  break;
150         case Tlabel:    ncol->label = (char *)val;      break;
151         case Tindex:    ncol->index = *(int *)val;      break;
152         case Tbool:     ncol->bool = *(int *)val;       break;
153         case Terror:    ncol->error = *(int *)val;      break;
154         default:        sysfatal("can't happen error");
155         }
156
157         if(Root == nil || Root->r > r){
158                 if((nrow = malloc(sizeof(Row))) == nil)
159                         sysfatal("no memory");
160                 nrow->col = ncol;
161                 ncol->next = nil;
162                 nrow->r = r;
163                 nrow->next = Root;
164                 Root = nrow;
165                 return;
166         }
167
168         for(row = Root; row; row = row->next){
169                 if(row->r == r){
170                         if(row->col->c > c){
171                                 ncol->next = row->col;
172                                 row->col = ncol;
173                                 return;
174                         }
175                         else{
176                                 for(col = row->col; col; col = col->next)
177                                         if(col->next == nil || col->next->c > c){
178                                                 ncol->next = col->next;
179                                                 col->next = ncol;
180                                                 return;
181                                         }
182                         }
183                 }
184
185                 if(row->next == nil || row->next->r > r){
186                         if((nrow = malloc(sizeof(Row))) == nil)
187                                 sysfatal("no memory");
188                         nrow->col = ncol;
189                         nrow->r = r;
190                         nrow->next = row->next;
191                         row->next = nrow;
192                         return;
193                 }
194         }
195         sysfatal("cannot happen error");
196 }
197
198 struct Tm *
199 bifftime(double num)
200 {
201         long long t = num;
202
203         /* Beware - These epochs are wrong, this
204          * is due to Excel still remaining compatible
205          * with Lotus-123, which incorrectly believed 1900
206          * was a leap year
207          */
208         if(Datemode)
209                 t -= 24107;             // epoch = 1/1/1904
210         else
211                 t -= 25569;             // epoch = 31/12/1899
212         t *= 60*60*24;
213
214         return localtime((long)t);
215 }
216
217 void
218 numfmt(int fmt, int min, int max, double num)
219 {
220         char buf[1024];
221         struct Tm *tm;
222
223         if(fmt == 9)
224                 snprint(buf, sizeof(buf),"%.0f%%", num);
225         else
226         if(fmt == 10)
227                 snprint(buf, sizeof(buf),"%f%%", num);
228         else
229         if(fmt == 11 || fmt == 48)
230                 snprint(buf, sizeof(buf),"%e", num);
231         else
232         if(fmt >= 14 && fmt <= 17){
233                 tm = bifftime(num);
234                 snprint(buf, sizeof(buf),"%d-%s-%d",
235                         tm->mday, Months[tm->mon], tm->year+1900);
236         }
237         else
238         if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
239                 tm = bifftime(num);
240                 snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
241
242         }
243         else
244         if(fmt == 22){
245                 tm = bifftime(num);
246                 snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
247                         tm->hour, tm->min, tm->sec,
248                         tm->mday, Months[tm->mon], tm->year+1900);
249
250         }else
251                 snprint(buf, sizeof(buf),"%g", num);
252
253         Bprint(bo, "%-*.*q", min, max, buf);
254 }
255
256 void
257 dump(void)
258 {
259         Row *r;
260         Col *c, *c1;
261         char *strfmt;
262         int i, n, last, min, max;
263
264         if(Doquote)
265                 strfmt = "%-*.*q";
266         else
267                 strfmt = "%-*.*s";
268
269         for(r = Root; r; r = r->next){
270                 n = 1;
271                 for(c = r->col; c; c = c->next){
272                         n++;
273                         if(! wanted(Columnrange, n))
274                                 continue;
275
276                         if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
277                                 min = Defwidth;
278                         if((c->next && c->c == c->next->c) || Nopad)
279                                 min = 0;
280                         max = -1;
281                         if(Trunc && min > 2)
282                                 max = min -2;   // FIXME: -2 because of bug %q format ?
283
284                         switch(c->type){
285                         case Tnumber:
286                                 if(Xf == nil || Xf[c->f] == 0)
287                                         Bprint(bo, "%-*.*g", min, max, c->number);
288                                 else
289                                         numfmt(Xf[c->f], min, max, c->number);
290                                 break;
291                         case Tlabel:
292                                 Bprint(bo, strfmt, min, max, c->label);
293                                 break;
294                         case Tbool:
295                                 Bprint(bo, strfmt, min, max, (c->bool)? "True": "False");
296                                 break;
297                         case Tindex:
298                                 if(c->index < 0 || c->index >= Nstrtab)
299                                         sysfatal("SST string out of range - corrupt file?");
300                                 Bprint(bo, strfmt, min, max, Strtab[c->index]);
301                                 break;
302                         case Terror:
303                                 if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
304                                         Bprint(bo, "#ERR=%d", c->index);
305                                 else
306                                         Bprint(bo, strfmt, min, max, Errmsgs[c->error]);
307                                 break;
308                         default:
309                                 sysfatal("cannot happen error");
310                                 break;
311                         }
312
313                         last = 1;
314                         for(i = n+1, c1 = c->next; c1; c1 = c1->next, i++)
315                                 if(wanted(Columnrange, i)){
316                                         last = 0;
317                                         break;
318                                 }
319
320                         if(! last){
321                                 if(c->next->c == c->c)          // bar charts
322                                         Bprint(bo, "=");
323                                 else{
324                                         Bprint(bo, "%s", Delim);
325                                         for(i = c->c; c->next && i < c->next->c -1; i++)
326                                                 Bprint(bo, "%-*.*s%s", min, max, "", Delim);
327                                 }
328                         }
329                 }
330                 if(r->next)
331                         for(i = r->r; i < r->next->r; i++)
332                                 Bprint(bo, "\n");
333
334         }
335         Bprint(bo, "\n");
336 }
337
338 void
339 release(void)
340 {
341         Row *r, *or;
342         Col *c, *oc;
343
344         r = Root;
345         while(r){
346                 c = r->col;
347                 while(c){
348                         if(c->type == Tlabel)
349                                 free(c->label);
350                         oc = c;
351                         c = c->next;
352                         free(oc);
353                 }
354                 or = r;
355                 r = r->next;
356                 free(or);
357         }
358         Root = nil;
359
360         memset(Width, 0, sizeof(Width));
361         Ncols = -1;
362 }
363
364 void
365 skip(Biff *b, int len)
366 {
367         assert(len <= b->len);
368         if(Bseek(b->bp, len, 1) == -1)
369                 sysfatal("seek failed - %r");
370         b->len -= len;
371 }
372
373 void
374 gmem(Biff *b, void *p, int n)
375 {
376         if(b->len < n)
377                 sysfatal("short record %d < %d", b->len, n);
378         if(Bread(b->bp, p, n) != n)
379                 sysfatal("unexpected EOF - %r");
380         b->len -= n;
381 }
382
383 void
384 xd(Biff *b)
385 {
386         uvlong off;
387         uchar buf[16];
388         int addr, got, n, i, j;
389
390         addr = 0;
391         off = Boffset(b->bp);
392         while(addr < b->len){
393                 n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
394                 got = Bread(b->bp, buf, n);
395
396                 Bprint(bo, "    %6d  ", addr);
397                 addr += n;
398
399                 for(i = 0; i < got; i++)
400                         Bprint(bo, "%02x ", buf[i]);
401                 for(j = i; j < 16; j++)
402                         Bprint(bo, "   ");
403                 Bprint(bo, "  ");
404                 for(i = 0; i < got; i++)
405                         Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
406                 Bprint(bo, "\n");
407         }
408         Bseek(b->bp, off, 0);
409 }
410
411 static int 
412 getrec(Biff *b)
413 {
414         int c;
415         if((c = Bgetc(b->bp)) == -1)
416                 return -1;              // real EOF
417         b->op = c;
418         if((c = Bgetc(b->bp)) == -1)
419                 sysfatal("unexpected EOF - %r");
420         b->op |= c << 8;
421         if((c = Bgetc(b->bp)) == -1)
422                 sysfatal("unexpected EOF - %r");
423         b->len = c;
424         if((c = Bgetc(b->bp)) == -1)
425                 sysfatal("unexpected EOF - %r");
426         b->len |= c << 8;
427         if(b->op == 0 && b->len == 0)
428                 return -1;
429         if(Debug){
430                 Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
431                 xd(b);
432         }
433         return 0;
434 }
435
436 static uvlong
437 gint(Biff *b, int n)
438 {
439         int i, c;
440         uvlong vl, rc;
441
442         if(b->len < n)
443                 return -1;
444         rc = 0;
445         for(i = 0; i < n; i++){
446                 if((c = Bgetc(b->bp)) == -1)
447                         sysfatal("unexpected EOF - %r");
448                 b->len--;
449                 vl = c;
450                 rc |= vl << (8*i);
451         }
452         return rc;
453 }
454
455 double
456 grk(Biff *b)
457 {
458         int f;
459         uvlong n;
460         double d;
461
462         n = gint(b, 4);
463         f = n & 3;
464         n &= ~3LL;
465         if(f & 2){
466                 d = n / 4.0;
467         }
468         else{
469                 n <<= 32;
470                 memcpy(&d, &n, sizeof(d));
471         }
472
473         if(f & 1)
474                 d /= 100.0;
475         return d;
476 }
477
478 double
479 gdoub(Biff *b)
480 {
481         double d;
482         uvlong n = gint(b, 8);
483         memcpy(&d, &n, sizeof(n));
484         return d;
485 }
486
487 char *
488 gstr(Biff *b, int len_width)
489 {
490         Rune r;
491         char *buf, *p;
492         int nch, w, ap, ln, rt, opt;
493         enum {
494                 Unicode = 1,
495                 Asian_phonetic = 4,
496                 Rich_text = 8,
497         };
498
499         if(b->len < len_width){
500                 if(getrec(b) == -1)
501                         sysfatal("starting STRING expected CONTINUE, got EOF");
502                 if(b->op != 0x03c)
503                         sysfatal("starting STRING expected CONTINUE, got op=0x%x", b->op);
504         }
505
506         ln = gint(b, len_width);
507         if(Biffver != Ver8){
508                 if((buf = calloc(ln+1, sizeof(char))) == nil)
509                         sysfatal("no memory");
510                 gmem(b, buf, ln);
511                 return buf;
512         }
513
514
515         if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
516                 sysfatal("no memory");
517         p = buf;
518
519         if(ln == 0)
520                 return buf;
521         nch = 0;
522         *buf = 0;
523         opt = gint(b, 1);
524         if(opt & Rich_text)
525                 rt = gint(b, 2);
526         else
527                 rt = 0;
528         if(opt & Asian_phonetic)
529                 ap = gint(b, 4);
530         else
531                 ap = 0;
532         for(;;){
533                 w = (opt & Unicode)? sizeof(Rune): sizeof(char);
534
535                 while(b->len > 0){
536                         r = gint(b, w);
537                         p += runetochar(p, &r);
538                         if(++nch >= ln){
539                                 if(rt)
540                                         skip(b, rt*4);
541                                 if(ap)
542                                         skip(b, ap);
543                                 return buf;
544                         }
545                 }
546                 if(getrec(b) == -1)
547                         sysfatal("in STRING expected CONTINUE, got EOF");
548                 if(b->op != 0x03c)      
549                         sysfatal("in STRING expected CONTINUE, got op=0x%x", b->op);
550                 opt = gint(b, 1);
551         }
552 }
553
554 void
555 sst(Biff *b)
556 {
557         int n;
558         
559         skip(b, 4);                     // total # strings
560         Nstrtab = gint(b, 4);           // # unique strings
561         if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
562                 sysfatal("no memory");
563         for(n = 0; n < Nstrtab; n++)
564                 Strtab[n] = gstr(b, 2);
565
566 }
567
568 void
569 boolerr(Biff *b)
570 {
571         int r = gint(b, 2);             // row
572         int c = gint(b, 2);             // col
573         int f = gint(b, 2);             // formatting ref
574         int v = gint(b, 1);             // bool value / err code
575         int t = gint(b, 1);             // type
576         cell(r, c, f, (t)? Terror: Tbool, &v);
577 }
578
579 void
580 rk(Biff *b)
581 {
582         int r = gint(b, 2);             // row
583         int c = gint(b, 2);             // col
584         int f = gint(b, 2);             // formatting ref
585         double v = grk(b);              // value
586         cell(r, c, f, Tnumber, &v);
587 }
588
589 void
590 mulrk(Biff *b)
591 {
592         int r = gint(b, 2);             // row
593         int c = gint(b, 2);             // first col
594         while(b->len >= 6){
595                 int f = gint(b, 2);     // formatting ref
596                 double v = grk(b);      // value
597                 cell(r, c++, f, Tnumber, &v);
598         }
599 }
600
601 void
602 number(Biff *b)
603 {
604         int r = gint(b, 2);             // row
605         int c = gint(b, 2);             // col
606         int f = gint(b, 2);             // formatting ref
607         double v = gdoub(b);            // double 
608         cell(r, c, f, Tnumber, &v);
609 }
610
611 void
612 label(Biff *b)
613 {
614         int r = gint(b, 2);             // row
615         int c = gint(b, 2);             // col
616         int f = gint(b, 2);             // formatting ref
617         char *s = gstr(b, 2);           // byte string
618         cell(r, c, f, Tlabel, s);
619 }
620
621
622 void
623 labelsst(Biff *b)
624 {
625         int r = gint(b, 2);             // row
626         int c = gint(b, 2);             // col
627         int f = gint(b, 2);             // formatting ref
628         int i = gint(b, 2);             // sst string ref
629         cell(r, c, f, Tindex, &i);
630 }
631
632 void
633 bof(Biff *b)
634 {
635         Biffver = gint(b, 2);
636         Content = gint(b, 2);   
637 }
638
639 void
640 defcolwidth(Biff *b)
641 {
642         Defwidth = gint(b, 2);
643 }
644
645 void
646 datemode(Biff *b)
647 {
648         Datemode = gint(b, 2);
649 }
650
651 void
652 eof(Biff *b)
653 {
654         int i;
655         struct {
656                 int n;
657                 char *s;
658         } names[] = {
659                 0x005,  "Workbook globals",
660                 0x006,  "Visual Basic module",
661                 0x010,  "Worksheet",
662                 0x020,  "Chart",
663                 0x040,  "Macro sheet",
664                 0x100,  "Workspace file",
665         };
666         static int sheet = 0;
667
668         if(! wanted(Sheetrange, ++sheet)){
669                 release();
670                 return;
671         }
672                         
673         if(Ncols != -1){
674                 if(All){
675                         for(i = 0; i < nelem(names); i++)
676                                 if(names[i].n == Content){
677                                         Bprint(bo, "\n# contents %s\n", names[i].s);
678                                         dump();
679                                 }
680                 }
681                 else 
682                 if(Content == 0x10)             // Worksheet
683                         dump();
684         }
685         release();
686         USED(b);
687 }
688
689 void
690 colinfo(Biff *b)
691 {
692         int c;
693         int c1 = gint(b, 2);
694         int c2 = gint(b, 2);
695         int w  = gint(b, 2);
696
697         if(c1 < 0)
698                 sysfatal("negative column number (%d)", c1);
699         if(c2 >= Nwidths)
700                 sysfatal("too many columns (%d > %d)", c2, Nwidths);
701         w /= 256;
702
703         if(w > 100)
704                 w = 100;
705         if(w < 0)
706                 w = 0;
707
708         for(c = c1; c <= c2; c++)
709                 Width[c] = w;
710 }
711
712 void
713 xf(Biff *b)
714 {
715         int fmt;
716         static int nalloc = 0;
717
718         skip(b, 2);
719         fmt = gint(b, 2);
720         if(nalloc >= Nxf){
721                 nalloc += 20;
722                 if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
723                         sysfatal("no memory");
724         }
725         Xf[Nxf++] = fmt;
726 }
727
728 void
729 writeaccess(Biff *b)
730 {
731         Bprint(bo, "# author %s\n", gstr(b, 2));
732 }
733
734 void
735 codepage(Biff *b)
736 {
737         int codepage = gint(b, 2);
738         if(codepage != 1200)                            // 1200 == UTF-16
739                 Bprint(bo, "# codepage %d\n", codepage);
740 }
741
742 void
743 xls2csv(Biobuf *bp)
744 {
745         int i;
746         Biff biff, *b;
747         struct {
748                 int op;
749                 void (*func)(Biff *);
750         } dispatch[] = {
751                 0x000a, eof,
752                 0x0022, datemode,
753                 0x0042, codepage,
754                 0x0055, defcolwidth,
755                 0x005c, writeaccess,
756                 0x007d, colinfo,
757                 0x00bd, mulrk,
758                 0x00fc, sst,
759                 0x00fd, labelsst,
760                 0x0203, number,
761                 0x0204, label,
762                 0x0205, boolerr,
763                 0x027e, rk,
764                 0x0809, bof,
765                 0x00e0, xf,
766         };              
767         
768         b = &biff;
769         b->bp = bp;
770         while(getrec(b) != -1){
771                 for(i = 0; i < nelem(dispatch); i++)
772                         if(b->op == dispatch[i].op)
773                                 (*dispatch[i].func)(b);
774                 skip(b, b->len);
775         }
776 }
777
778 void
779 usage(void)
780 {
781         fprint(2, "usage: %s [-Danqt] [-w worksheets] [-c columns] [-d delim] /mnt/doc/Workbook\n", argv0);
782         exits("usage");
783 }
784
785 void
786 main(int argc, char *argv[])
787 {
788         int i;
789         Biobuf bin, bout, *bp;
790         
791         ARGBEGIN{
792         case 'D':
793                 Debug = 1;
794                 break;
795         case 'a':
796                 All = 1;
797                 break;
798         case 'q':
799                 Doquote = 0;
800                 break;
801         case 'd':
802                 Delim = EARGF(usage());
803                 break;
804         case 'n':
805                 Nopad = 1;
806                 break;
807         case 't':
808                 Trunc = 1;
809                 break;
810         case 'c':
811                 Columnrange = EARGF(usage());
812                 break;
813         case 'w':
814                 Sheetrange = EARGF(usage());
815                 break;
816         default:
817                 usage();
818                 break;
819         }ARGEND;
820
821         if(argc != 1)
822                 usage();
823
824         bo = &bout;
825         quotefmtinstall();
826         Binit(bo, OWRITE, 1);
827
828         if(argc > 0) {
829                 for(i = 0; i < argc; i++){
830                         if((bp = Bopen(argv[i], OREAD)) == nil)
831                                 sysfatal("%s cannot open - %r", argv[i]);
832                         xls2csv(bp);
833                         Bterm(bp);
834                 }
835         } else {
836                 Binit(&bin, 0, OREAD);
837                 xls2csv(&bin);
838         }
839         exits(0);
840 }
841