1 /* msexceltables.c Steve Simon 5-Jan-2005 */
15 Ver8 = 0x600, // only BIFF8 and BIFF8x files support unicode
21 typedef struct Biff Biff;
22 typedef struct Col Col;
23 typedef struct Row Row;
26 Row *next; // next row
28 Col *col; // list of cols in row
32 Col *next; // next col in row
34 int f; // index into formating table (Xf)
35 int type; // type of value for union below
37 int index; // index into string table (Strtab)
46 Biobuf *bp; // input file
47 int op; // current record type
48 int len; // length of current record
52 static int Nopad = 0; // disable padding cells to colum width
53 static int Trunc = 0; // truncate cells to colum width
54 static int All = 0; // dump all sheet types, Worksheets only by default
55 static char *Delim = " "; // field delimiter
56 static char *Sheetrange = nil; // range of sheets wanted
57 static char *Columnrange = nil; // range of collums wanted
61 static int Defwidth = 10; // default colum width if non given
62 static int Biffver; // file vesion
63 static int Datemode; // date ref: 1899-Dec-31 or 1904-jan-1
64 static char **Strtab = nil; // label contents heap
65 static int Nstrtab = 0; // # of above
66 static int *Xf; // array of extended format indices
67 static int Nxf = 0; // # of above
68 static Biobuf *bo; // stdout (sic)
69 static int Doquote = 1; // quote text fields if they are rc(1) unfriendly
72 static int Width[Nwidths]; // array of colum widths
73 static int Ncols = -1; // max colums in table used
74 static int Content = 0; // type code for contents of sheet
75 static Row *Root = nil; // one worksheet's worth of cells
77 static char *Months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
78 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
80 static char *Errmsgs[] = {
81 [0x0] "#NULL!", // intersection of two cell ranges is empty
82 [0x7] "#DIV/0!", // division by zero
83 [0xf] "#VALUE!", // wrong type of operand
84 [0x17] "#REF!", // illegal or deleted cell reference
85 [0x1d] "#NAME?", // wrong function or range name
86 [0x24] "#NUM!", // value range overflow
87 [0x2a] "#N/A!", // argument of function not available
91 wanted(char *range, int here)
102 n = strtol(p, &p, 10);
107 if(s != -1 && here > s && here < n)
113 if(s != -1 && here > s && here < n)
125 sysfatal("%s malformed range spec", range);
133 cell(int r, int c, int f, int type, void *val)
141 if((ncol = malloc(sizeof(Col))) == nil)
142 sysfatal("no memory");
149 case Tnumber: ncol->number = *(double *)val; break;
150 case Tlabel: ncol->label = (char *)val; break;
151 case Tindex: ncol->index = *(int *)val; break;
152 case Tbool: ncol->bool = *(int *)val; break;
153 case Terror: ncol->error = *(int *)val; break;
154 default: sysfatal("can't happen error");
157 if(Root == nil || Root->r > r){
158 if((nrow = malloc(sizeof(Row))) == nil)
159 sysfatal("no memory");
168 for(row = Root; row; row = row->next){
171 ncol->next = row->col;
176 for(col = row->col; col; col = col->next)
177 if(col->next == nil || col->next->c > c){
178 ncol->next = col->next;
185 if(row->next == nil || row->next->r > r){
186 if((nrow = malloc(sizeof(Row))) == nil)
187 sysfatal("no memory");
190 nrow->next = row->next;
195 sysfatal("cannot happen error");
203 /* Beware - These epochs are wrong, this
204 * is due to Excel still remaining compatible
205 * with Lotus-123, which incorrectly believed 1900
209 t -= 24107; // epoch = 1/1/1904
211 t -= 25569; // epoch = 31/12/1899
214 return localtime((long)t);
218 numfmt(int fmt, int min, int max, double num)
224 snprint(buf, sizeof(buf),"%.0f%%", num);
227 snprint(buf, sizeof(buf),"%f%%", num);
229 if(fmt == 11 || fmt == 48)
230 snprint(buf, sizeof(buf),"%e", num);
232 if(fmt >= 14 && fmt <= 17){
234 snprint(buf, sizeof(buf),"%d-%s-%d",
235 tm->mday, Months[tm->mon], tm->year+1900);
238 if((fmt >= 18 && fmt <= 21) || (fmt >= 45 && fmt <= 47)){
240 snprint(buf, sizeof(buf),"%02d:%02d:%02d", tm->hour, tm->min, tm->sec);
246 snprint(buf, sizeof(buf),"%02d:%02d:%02d %d-%s-%d",
247 tm->hour, tm->min, tm->sec,
248 tm->mday, Months[tm->mon], tm->year+1900);
251 snprint(buf, sizeof(buf),"%g", num);
253 Bprint(bo, "%-*.*q", min, max, buf);
262 int i, n, last, min, max;
269 for(r = Root; r; r = r->next){
271 for(c = r->col; c; c = c->next){
273 if(! wanted(Columnrange, n))
276 if(c->c < 0 || c->c >= Nwidths || (min = Width[c->c]) == 0)
278 if((c->next && c->c == c->next->c) || Nopad)
282 max = min -2; // FIXME: -2 because of bug %q format ?
286 if(Xf == nil || Xf[c->f] == 0)
287 Bprint(bo, "%-*.*g", min, max, c->number);
289 numfmt(Xf[c->f], min, max, c->number);
292 Bprint(bo, strfmt, min, max, c->label);
295 Bprint(bo, strfmt, min, max, (c->bool)? "True": "False");
298 if(c->index < 0 || c->index >= Nstrtab)
299 sysfatal("SST string out of range - corrupt file?");
300 Bprint(bo, strfmt, min, max, Strtab[c->index]);
303 if(c->error < 0 || c->error >= nelem(Errmsgs) || !Errmsgs[c->error])
304 Bprint(bo, "#ERR=%d", c->index);
306 Bprint(bo, strfmt, min, max, Errmsgs[c->error]);
309 sysfatal("cannot happen error");
314 for(i = n+1, c1 = c->next; c1; c1 = c1->next, i++)
315 if(wanted(Columnrange, i)){
321 if(c->next->c == c->c) // bar charts
324 Bprint(bo, "%s", Delim);
325 for(i = c->c; c->next && i < c->next->c -1; i++)
326 Bprint(bo, "%-*.*s%s", min, max, "", Delim);
331 for(i = r->r; i < r->next->r; i++)
348 if(c->type == Tlabel)
360 memset(Width, 0, sizeof(Width));
365 skip(Biff *b, int len)
367 assert(len <= b->len);
368 if(Bseek(b->bp, len, 1) == -1)
369 sysfatal("seek failed - %r");
374 gmem(Biff *b, void *p, int n)
377 sysfatal("short record %d < %d", b->len, n);
378 if(Bread(b->bp, p, n) != n)
379 sysfatal("unexpected EOF - %r");
388 int addr, got, n, i, j;
391 off = Boffset(b->bp);
392 while(addr < b->len){
393 n = (b->len >= sizeof(buf))? sizeof(buf): b->len;
394 got = Bread(b->bp, buf, n);
396 Bprint(bo, " %6d ", addr);
399 for(i = 0; i < got; i++)
400 Bprint(bo, "%02x ", buf[i]);
401 for(j = i; j < 16; j++)
404 for(i = 0; i < got; i++)
405 Bprint(bo, "%c", isprint(buf[i])? buf[i]: '.');
408 Bseek(b->bp, off, 0);
415 if((c = Bgetc(b->bp)) == -1)
416 return -1; // real EOF
418 if((c = Bgetc(b->bp)) == -1)
419 sysfatal("unexpected EOF - %r");
421 if((c = Bgetc(b->bp)) == -1)
422 sysfatal("unexpected EOF - %r");
424 if((c = Bgetc(b->bp)) == -1)
425 sysfatal("unexpected EOF - %r");
427 if(b->op == 0 && b->len == 0)
430 Bprint(bo, "op=0x%x len=%d\n", b->op, b->len);
445 for(i = 0; i < n; i++){
446 if((c = Bgetc(b->bp)) == -1)
447 sysfatal("unexpected EOF - %r");
470 memcpy(&d, &n, sizeof(d));
482 uvlong n = gint(b, 8);
483 memcpy(&d, &n, sizeof(n));
488 gstr(Biff *b, int len_width)
492 int nch, w, ap, ln, rt, opt;
499 if(b->len < len_width){
501 sysfatal("starting STRING expected CONTINUE, got EOF");
503 sysfatal("starting STRING expected CONTINUE, got op=0x%x", b->op);
506 ln = gint(b, len_width);
508 if((buf = calloc(ln+1, sizeof(char))) == nil)
509 sysfatal("no memory");
515 if((buf = calloc(ln+1, sizeof(char)*UTFmax)) == nil)
516 sysfatal("no memory");
528 if(opt & Asian_phonetic)
533 w = (opt & Unicode)? sizeof(Rune): sizeof(char);
537 p += runetochar(p, &r);
547 sysfatal("in STRING expected CONTINUE, got EOF");
549 sysfatal("in STRING expected CONTINUE, got op=0x%x", b->op);
559 skip(b, 4); // total # strings
560 Nstrtab = gint(b, 4); // # unique strings
561 if((Strtab = calloc(Nstrtab, sizeof(char *))) == nil)
562 sysfatal("no memory");
563 for(n = 0; n < Nstrtab; n++)
564 Strtab[n] = gstr(b, 2);
571 int r = gint(b, 2); // row
572 int c = gint(b, 2); // col
573 int f = gint(b, 2); // formatting ref
574 int v = gint(b, 1); // bool value / err code
575 int t = gint(b, 1); // type
576 cell(r, c, f, (t)? Terror: Tbool, &v);
582 int r = gint(b, 2); // row
583 int c = gint(b, 2); // col
584 int f = gint(b, 2); // formatting ref
585 double v = grk(b); // value
586 cell(r, c, f, Tnumber, &v);
592 int r = gint(b, 2); // row
593 int c = gint(b, 2); // first col
595 int f = gint(b, 2); // formatting ref
596 double v = grk(b); // value
597 cell(r, c++, f, Tnumber, &v);
604 int r = gint(b, 2); // row
605 int c = gint(b, 2); // col
606 int f = gint(b, 2); // formatting ref
607 double v = gdoub(b); // double
608 cell(r, c, f, Tnumber, &v);
614 int r = gint(b, 2); // row
615 int c = gint(b, 2); // col
616 int f = gint(b, 2); // formatting ref
617 char *s = gstr(b, 2); // byte string
618 cell(r, c, f, Tlabel, s);
625 int r = gint(b, 2); // row
626 int c = gint(b, 2); // col
627 int f = gint(b, 2); // formatting ref
628 int i = gint(b, 2); // sst string ref
629 cell(r, c, f, Tindex, &i);
635 Biffver = gint(b, 2);
636 Content = gint(b, 2);
642 Defwidth = gint(b, 2);
648 Datemode = gint(b, 2);
659 0x005, "Workbook globals",
660 0x006, "Visual Basic module",
663 0x040, "Macro sheet",
664 0x100, "Workspace file",
666 static int sheet = 0;
668 if(! wanted(Sheetrange, ++sheet)){
675 for(i = 0; i < nelem(names); i++)
676 if(names[i].n == Content){
677 Bprint(bo, "\n# contents %s\n", names[i].s);
682 if(Content == 0x10) // Worksheet
698 sysfatal("negative column number (%d)", c1);
700 sysfatal("too many columns (%d > %d)", c2, Nwidths);
708 for(c = c1; c <= c2; c++)
716 static int nalloc = 0;
722 if((Xf = realloc(Xf, nalloc*sizeof(int))) == nil)
723 sysfatal("no memory");
731 Bprint(bo, "# author %s\n", gstr(b, 2));
737 int codepage = gint(b, 2);
738 if(codepage != 1200) // 1200 == UTF-16
739 Bprint(bo, "# codepage %d\n", codepage);
749 void (*func)(Biff *);
770 while(getrec(b) != -1){
771 for(i = 0; i < nelem(dispatch); i++)
772 if(b->op == dispatch[i].op)
773 (*dispatch[i].func)(b);
781 fprint(2, "usage: %s [-Danqt] [-w worksheets] [-c columns] [-d delim] /mnt/doc/Workbook\n", argv0);
786 main(int argc, char *argv[])
789 Biobuf bin, bout, *bp;
802 Delim = EARGF(usage());
811 Columnrange = EARGF(usage());
814 Sheetrange = EARGF(usage());
826 Binit(bo, OWRITE, 1);
829 for(i = 0; i < argc; i++){
830 if((bp = Bopen(argv[i], OREAD)) == nil)
831 sysfatal("%s cannot open - %r", argv[i]);
836 Binit(&bin, 0, OREAD);