]> git.lizzy.rs Git - plan9front.git/blob - sys/src/cmd/aux/antiword/wordole.c
exec(2): fix prototypes
[plan9front.git] / sys / src / cmd / aux / antiword / wordole.c
1 /*
2  * wordole.c
3  * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
4  *
5  * Description:
6  * Deal with the OLE internals of a MS Word file
7  */
8
9 #include <string.h>
10 #include "antiword.h"
11
12 /* Private type for Property Set Storage entries */
13 typedef struct pps_entry_tag {
14         ULONG   ulNext;
15         ULONG   ulPrevious;
16         ULONG   ulDir;
17         ULONG   ulSB;
18         ULONG   ulSize;
19         int     iLevel;
20         char    szName[32];
21         UCHAR   ucType;
22 } pps_entry_type;
23
24 /* Show that a PPS number or index should not be used */
25 #define PPS_NUMBER_INVALID      0xffffffffUL
26
27
28 /* Macro to make sure all such statements will be identical */
29 #define FREE_ALL()              \
30         do {\
31                 vDestroySmallBlockList();\
32                 aulRootList = xfree(aulRootList);\
33                 aulSbdList = xfree(aulSbdList);\
34                 aulBbdList = xfree(aulBbdList);\
35                 aulSBD = xfree(aulSBD);\
36                 aulBBD = xfree(aulBBD);\
37         } while(0)
38
39
40 /*
41  * ulReadLong - read four bytes from the given file and offset
42  */
43 static ULONG
44 ulReadLong(FILE *pFile, ULONG ulOffset)
45 {
46         UCHAR   aucBytes[4];
47
48         fail(pFile == NULL);
49
50         if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
51                 werr(1, "Read long 0x%lx not possible", ulOffset);
52         }
53         return ulGetLong(0, aucBytes);
54 } /* end of ulReadLong */
55
56 /*
57  * vName2String - turn the name into a proper string.
58  */
59 static void
60 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
61 {
62         char    *pcChar;
63         size_t  tIndex;
64
65         fail(aucBytes == NULL || szName == NULL);
66
67         if (tNameSize < 2) {
68                 szName[0] = '\0';
69                 return;
70         }
71         for (tIndex = 0, pcChar = szName;
72              tIndex < 2 * tNameSize;
73              tIndex += 2, pcChar++) {
74                 *pcChar = (char)aucBytes[tIndex];
75         }
76         szName[tNameSize - 1] = '\0';
77 } /* end of vName2String */
78
79 /*
80  * tReadBlockIndices - read the Big/Small Block Depot indices
81  *
82  * Returns the number of indices read
83  */
84 static size_t
85 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
86         size_t tMaxRec, ULONG ulOffset)
87 {
88         size_t  tDone;
89         int     iIndex;
90         UCHAR   aucBytes[BIG_BLOCK_SIZE];
91
92         fail(pFile == NULL || aulBlockDepot == NULL);
93         fail(tMaxRec == 0);
94
95         /* Read a big block with BBD or SBD indices */
96         if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
97                 werr(0, "Reading big block from 0x%lx is not possible",
98                         ulOffset);
99                 return 0;
100         }
101         /* Split the big block into indices, an index is four bytes */
102         tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
103         for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
104                 aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
105                 NO_DBG_DEC(aulBlockDepot[iIndex]);
106         }
107         return tDone;
108 } /* end of tReadBlockIndices */
109
110 /*
111  * bGetBBD - get the Big Block Depot indices from the index-blocks
112  */
113 static BOOL
114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
115         ULONG *aulBBD, size_t tBBDLen)
116 {
117         ULONG   ulBegin;
118         size_t  tToGo, tDone;
119         int     iIndex;
120
121         fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
122
123         DBG_MSG("bGetBBD");
124
125         tToGo = tBBDLen;
126         for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
127                 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
128                 NO_DBG_HEX(ulBegin);
129                 tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
130                 fail(tDone > tToGo);
131                 if (tDone == 0) {
132                         return FALSE;
133                 }
134                 aulBBD += tDone;
135                 tToGo -= tDone;
136         }
137         return tToGo == 0;
138 } /* end of bGetBBD */
139
140 /*
141  * bGetSBD - get the Small Block Depot indices from the index-blocks
142  */
143 static BOOL
144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
145         ULONG *aulSBD, size_t tSBDLen)
146 {
147         ULONG   ulBegin;
148         size_t  tToGo, tDone;
149         int     iIndex;
150
151         fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
152
153         DBG_MSG("bGetSBD");
154
155         tToGo = tSBDLen;
156         for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
157                 fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
158                 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
159                 NO_DBG_HEX(ulBegin);
160                 tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
161                 fail(tDone > tToGo);
162                 if (tDone == 0) {
163                         return FALSE;
164                 }
165                 aulSBD += tDone;
166                 tToGo -= tDone;
167         }
168         return tToGo == 0;
169 } /* end of bGetSBD */
170
171 /*
172  * vComputePPSlevels - compute the levels of the Property Set Storage entries
173  */
174 static void
175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
176                         int iLevel, int iRecursionLevel)
177 {
178         fail(atPPSlist == NULL || pNode == NULL);
179         fail(iLevel < 0 || iRecursionLevel < 0);
180
181         if (iRecursionLevel > 25) {
182                 /* This removes the possibility of an infinite recursion */
183                 DBG_DEC(iRecursionLevel);
184                 return;
185         }
186         if (pNode->iLevel <= iLevel) {
187                 /* Avoid entering a loop */
188                 DBG_DEC(iLevel);
189                 DBG_DEC(pNode->iLevel);
190                 return;
191         }
192
193         pNode->iLevel = iLevel;
194
195         if (pNode->ulDir != PPS_NUMBER_INVALID) {
196                 vComputePPSlevels(atPPSlist,
197                                 &atPPSlist[pNode->ulDir],
198                                 iLevel + 1,
199                                 iRecursionLevel + 1);
200         }
201         if (pNode->ulNext != PPS_NUMBER_INVALID) {
202                 vComputePPSlevels(atPPSlist,
203                                 &atPPSlist[pNode->ulNext],
204                                 iLevel,
205                                 iRecursionLevel + 1);
206         }
207         if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
208                 vComputePPSlevels(atPPSlist,
209                                 &atPPSlist[pNode->ulPrevious],
210                                 iLevel,
211                                 iRecursionLevel + 1);
212         }
213 } /* end of vComputePPSlevels */
214
215 /*
216  * bGetPPS - search the Property Set Storage for three sets
217  *
218  * Return TRUE if the WordDocument PPS is found
219  */
220 static BOOL
221 bGetPPS(FILE *pFile,
222         const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
223 {
224         pps_entry_type  *atPPSlist;
225         ULONG   ulBegin, ulOffset, ulTmp;
226         size_t  tNbrOfPPS, tNameSize;
227         int     iIndex, iStartBlock, iRootIndex;
228         BOOL    bWord, bExcel;
229         UCHAR   aucBytes[PROPERTY_SET_STORAGE_SIZE];
230
231         fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
232
233         DBG_MSG("bGetPPS");
234
235         NO_DBG_DEC(tRootListLen);
236
237         bWord = FALSE;
238         bExcel = FALSE;
239         (void)memset(pPPS, 0, sizeof(*pPPS));
240
241         /* Read and store all the Property Set Storage entries */
242
243         tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
244         atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
245         iRootIndex = 0;
246
247         for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
248                 ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
249                 iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
250                 ulOffset = ulTmp % BIG_BLOCK_SIZE;
251                 ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
252                                 ulOffset;
253                 NO_DBG_HEX(ulBegin);
254                 if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
255                                                         ulBegin, pFile)) {
256                         werr(0, "Reading PPS %d is not possible", iIndex);
257                         atPPSlist = xfree(atPPSlist);
258                         return FALSE;
259                 }
260                 tNameSize = (size_t)usGetWord(0x40, aucBytes);
261                 tNameSize = (tNameSize + 1) / 2;
262                 vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
263                 atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
264                 if (atPPSlist[iIndex].ucType == 5) {
265                         iRootIndex = iIndex;
266                 }
267                 atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
268                 atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
269                 atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
270                 atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
271                 atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
272                 atPPSlist[iIndex].iLevel = INT_MAX;
273                 if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
274                      atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
275                     (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
276                      atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
277                     (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
278                      atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
279                         DBG_DEC(iIndex);
280                         DBG_DEC(atPPSlist[iIndex].ulPrevious);
281                         DBG_DEC(atPPSlist[iIndex].ulNext);
282                         DBG_DEC(atPPSlist[iIndex].ulDir);
283                         DBG_DEC(tNbrOfPPS);
284                         werr(0, "The Property Set Storage is damaged");
285                         atPPSlist = xfree(atPPSlist);
286                         return FALSE;
287                 }
288         }
289
290 #if 0 /* defined(DEBUG) */
291         DBG_MSG("Before");
292         for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
293                 DBG_MSG(atPPSlist[iIndex].szName);
294                 DBG_HEX(atPPSlist[iIndex].ulDir);
295                 DBG_HEX(atPPSlist[iIndex].ulPrevious);
296                 DBG_HEX(atPPSlist[iIndex].ulNext);
297                 DBG_DEC(atPPSlist[iIndex].ulSB);
298                 DBG_HEX(atPPSlist[iIndex].ulSize);
299                 DBG_DEC(atPPSlist[iIndex].iLevel);
300         }
301 #endif /* DEBUG */
302
303         /* Add level information to each entry */
304         vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
305
306         /* Check the entries on level 1 for the required information */
307         NO_DBG_MSG("After");
308         for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
309 #if 0 /* defined(DEBUG) */
310                 DBG_MSG(atPPSlist[iIndex].szName);
311                 DBG_HEX(atPPSlist[iIndex].ulDir);
312                 DBG_HEX(atPPSlist[iIndex].ulPrevious);
313                 DBG_HEX(atPPSlist[iIndex].ulNext);
314                 DBG_DEC(atPPSlist[iIndex].ulSB);
315                 DBG_HEX(atPPSlist[iIndex].ulSize);
316                 DBG_DEC(atPPSlist[iIndex].iLevel);
317 #endif /* DEBUG */
318                 if (atPPSlist[iIndex].iLevel != 1 ||
319                     atPPSlist[iIndex].ucType != 2 ||
320                     atPPSlist[iIndex].szName[0] == '\0' ||
321                     atPPSlist[iIndex].ulSize == 0) {
322                         /* This entry can be ignored */
323                         continue;
324                 }
325                 if (pPPS->tWordDocument.ulSize == 0 &&
326                     STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
327                         pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
328                         pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
329                         bWord = TRUE;
330                 } else if (pPPS->tData.ulSize == 0 &&
331                            STREQ(atPPSlist[iIndex].szName, "Data")) {
332                         pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
333                         pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
334                 } else if (pPPS->t0Table.ulSize == 0 &&
335                            STREQ(atPPSlist[iIndex].szName, "0Table")) {
336                         pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
337                         pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
338                 } else if (pPPS->t1Table.ulSize == 0 &&
339                            STREQ(atPPSlist[iIndex].szName, "1Table")) {
340                         pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
341                         pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
342                 } else if (pPPS->tSummaryInfo.ulSize == 0 &&
343                            STREQ(atPPSlist[iIndex].szName,
344                                                 "\005SummaryInformation")) {
345                         pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
346                         pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
347                 } else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
348                            STREQ(atPPSlist[iIndex].szName,
349                                         "\005DocumentSummaryInformation")) {
350                         pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
351                         pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
352                 } else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
353                            STREQ(atPPSlist[iIndex].szName, "Workbook")) {
354                         bExcel = TRUE;
355                 }
356         }
357
358         /* Free the space for the Property Set Storage entries */
359         atPPSlist = xfree(atPPSlist);
360
361         /* Draw your conclusions */
362         if (bWord) {
363                 return TRUE;
364         }
365
366         if (bExcel) {
367                 werr(0, "Sorry, but this is an Excel spreadsheet");
368         } else {
369                 werr(0, "This OLE file does not contain a Word document");
370         }
371         return FALSE;
372 } /* end of bGetPPS */
373
374 /*
375  * vGetBbdList - make a list of the places to find big blocks
376  */
377 static void
378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
379 {
380         int     iIndex;
381
382         fail(pFile == NULL);
383         fail(iNbr > 127);
384         fail(aulBbdList == NULL);
385
386         NO_DBG_DEC(iNbr);
387         for (iIndex = 0; iIndex < iNbr; iIndex++) {
388                 aulBbdList[iIndex] =
389                         ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
390                 NO_DBG_DEC(iIndex);
391                 NO_DBG_HEX(aulBbdList[iIndex]);
392         }
393 } /* end of vGetBbdList */
394
395 /*
396  * bGetDocumentText - make a list of the text blocks of a Word document
397  *
398  * Return TRUE when succesful, otherwise FALSE
399  */
400 static BOOL
401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
402         const ULONG *aulBBD, size_t tBBDLen,
403         const ULONG *aulSBD, size_t tSBDLen,
404         const UCHAR *aucHeader, int iWordVersion)
405 {
406         ULONG   ulBeginOfText;
407         ULONG   ulTextLen, ulFootnoteLen, ulEndnoteLen;
408         ULONG   ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
409         ULONG   ulTextBoxLen, ulHdrTextBoxLen;
410         UINT    uiQuickSaves;
411         BOOL    bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
412         USHORT  usIdent, usDocStatus;
413
414         fail(pFile == NULL || pPPS == NULL);
415         fail(aulBBD == NULL);
416         fail(aulSBD == NULL);
417
418         DBG_MSG("bGetDocumentText");
419
420         /* Get the "magic number" from the header */
421         usIdent = usGetWord(0x00, aucHeader);
422         DBG_HEX(usIdent);
423         bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
424                         usIdent == 0xa697 || usIdent == 0xa699;
425         /* Get the status flags from the header */
426         usDocStatus = usGetWord(0x0a, aucHeader);
427         DBG_HEX(usDocStatus);
428         bTemplate = (usDocStatus & BIT(0)) != 0;
429         DBG_MSG_C(bTemplate, "This document is a Template");
430         bFastSaved = (usDocStatus & BIT(2)) != 0;
431         uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
432         DBG_MSG_C(bFastSaved, "This document is Fast Saved");
433         DBG_DEC_C(bFastSaved, uiQuickSaves);
434         bEncrypted = (usDocStatus & BIT(8)) != 0;
435         if (bEncrypted) {
436                 werr(0, "Encrypted documents are not supported");
437                 return FALSE;
438         }
439
440         /* Get length information */
441         ulBeginOfText = ulGetLong(0x18, aucHeader);
442         DBG_HEX(ulBeginOfText);
443         switch (iWordVersion) {
444         case 6:
445         case 7:
446                 ulTextLen = ulGetLong(0x34, aucHeader);
447                 ulFootnoteLen = ulGetLong(0x38, aucHeader);
448                 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
449                 ulMacroLen = ulGetLong(0x40, aucHeader);
450                 ulAnnotationLen = ulGetLong(0x44, aucHeader);
451                 ulEndnoteLen = ulGetLong(0x48, aucHeader);
452                 ulTextBoxLen = ulGetLong(0x4c, aucHeader);
453                 ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
454                 break;
455         case 8:
456                 ulTextLen = ulGetLong(0x4c, aucHeader);
457                 ulFootnoteLen = ulGetLong(0x50, aucHeader);
458                 ulHdrFtrLen = ulGetLong(0x54, aucHeader);
459                 ulMacroLen = ulGetLong(0x58, aucHeader);
460                 ulAnnotationLen = ulGetLong(0x5c, aucHeader);
461                 ulEndnoteLen = ulGetLong(0x60, aucHeader);
462                 ulTextBoxLen = ulGetLong(0x64, aucHeader);
463                 ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
464                 break;
465         default:
466                 werr(0, "This version of Word is not supported");
467                 return FALSE;
468         }
469         DBG_DEC(ulTextLen);
470         DBG_DEC(ulFootnoteLen);
471         DBG_DEC(ulHdrFtrLen);
472         DBG_DEC(ulMacroLen);
473         DBG_DEC(ulAnnotationLen);
474         DBG_DEC(ulEndnoteLen);
475         DBG_DEC(ulTextBoxLen);
476         DBG_DEC(ulHdrTextBoxLen);
477
478         /* Make a list of the text blocks */
479         switch (iWordVersion) {
480         case 6:
481         case 7:
482                 if (bFastSaved) {
483                         bSuccess = bGet6DocumentText(pFile,
484                                         bFarEastWord,
485                                         pPPS->tWordDocument.ulSB,
486                                         aulBBD, tBBDLen,
487                                         aucHeader);
488                 } else {
489                         bSuccess = bAddTextBlocks(ulBeginOfText,
490                                 ulTextLen +
491                                 ulFootnoteLen +
492                                 ulHdrFtrLen +
493                                 ulMacroLen + ulAnnotationLen +
494                                 ulEndnoteLen +
495                                 ulTextBoxLen + ulHdrTextBoxLen,
496                                 bFarEastWord,
497                                 IGNORE_PROPMOD,
498                                 pPPS->tWordDocument.ulSB,
499                                 aulBBD, tBBDLen);
500                 }
501                 break;
502         case 8:
503                 bSuccess = bGet8DocumentText(pFile,
504                                 pPPS,
505                                 aulBBD, tBBDLen, aulSBD, tSBDLen,
506                                 aucHeader);
507                 break;
508         default:
509                 werr(0, "This version of Word is not supported");
510                 bSuccess = FALSE;
511                 break;
512         }
513
514         if (bSuccess) {
515                 vSplitBlockList(pFile,
516                                 ulTextLen,
517                                 ulFootnoteLen,
518                                 ulHdrFtrLen,
519                                 ulMacroLen,
520                                 ulAnnotationLen,
521                                 ulEndnoteLen,
522                                 ulTextBoxLen,
523                                 ulHdrTextBoxLen,
524                                 !bFastSaved && iWordVersion == 8);
525         } else {
526                 vDestroyTextBlockList();
527                 werr(0, "I can't find the text of this document");
528         }
529         return bSuccess;
530 } /* end of bGetDocumentText */
531
532 /*
533  * vGetDocumentData - make a list of the data blocks of a Word document
534  */
535 static void
536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
537         const ULONG *aulBBD, size_t tBBDLen,
538         const UCHAR *aucHeader, int iWordVersion)
539 {
540         options_type    tOptions;
541         ULONG   ulBeginOfText;
542         BOOL    bFastSaved, bHasImages, bSuccess;
543         USHORT  usDocStatus;
544
545         fail(pFile == NULL);
546         fail(pPPS == NULL);
547         fail(aulBBD == NULL);
548
549         /* Get the options */
550         vGetOptions(&tOptions);
551
552         /* Get the status flags from the header */
553         usDocStatus = usGetWord(0x0a, aucHeader);
554         DBG_HEX(usDocStatus);
555         bFastSaved = (usDocStatus & BIT(2)) != 0;
556         bHasImages = (usDocStatus & BIT(3)) != 0;
557
558         if (!bHasImages ||
559             tOptions.eConversionType == conversion_text ||
560             tOptions.eConversionType == conversion_fmt_text ||
561             tOptions.eConversionType == conversion_xml ||
562             tOptions.eImageLevel == level_no_images) {
563                 /*
564                  * No images in the document or text-only output or
565                  * no images wanted, so no data blocks will be needed
566                  */
567                 vDestroyDataBlockList();
568                 return;
569         }
570
571         /* Get length information */
572         ulBeginOfText = ulGetLong(0x18, aucHeader);
573         DBG_HEX(ulBeginOfText);
574
575         /* Make a list of the data blocks */
576         switch (iWordVersion) {
577         case 6:
578         case 7:
579                 /*
580                  * The data blocks are in the text stream. The text stream
581                  * is in "fast saved" format or "normal saved" format
582                  */
583                 if (bFastSaved) {
584                         bSuccess = bGet6DocumentData(pFile,
585                                         pPPS->tWordDocument.ulSB,
586                                         aulBBD, tBBDLen,
587                                         aucHeader);
588                 } else {
589                         bSuccess = bAddDataBlocks(ulBeginOfText,
590                                         (ULONG)LONG_MAX,
591                                         pPPS->tWordDocument.ulSB,
592                                         aulBBD, tBBDLen);
593                 }
594                 break;
595         case 8:
596                 /*
597                  * The data blocks are in the data stream. The data stream
598                  * is always in "normal saved" format
599                  */
600                 bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
601                                 pPPS->tData.ulSB, aulBBD, tBBDLen);
602                 break;
603         default:
604                 werr(0, "This version of Word is not supported");
605                 bSuccess = FALSE;
606                 break;
607         }
608
609         if (!bSuccess) {
610                 vDestroyDataBlockList();
611                 werr(0, "I can't find the data of this document");
612         }
613 } /* end of vGetDocumentData */
614
615 /*
616  * iInitDocumentOLE - initialize an OLE document
617  *
618  * Returns the version of Word that made the document or -1
619  */
620 int
621 iInitDocumentOLE(FILE *pFile, long lFilesize)
622 {
623         pps_info_type   PPS_info;
624         ULONG   *aulBBD, *aulSBD;
625         ULONG   *aulRootList, *aulBbdList, *aulSbdList;
626         ULONG   ulBdbListStart, ulAdditionalBBDlist;
627         ULONG   ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
628         ULONG   ulStart, ulTmp;
629         long    lMaxBlock;
630         size_t  tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
631         int     iWordVersion, iIndex, iToGo;
632         BOOL    bSuccess;
633         USHORT  usIdent, usDocStatus;
634         UCHAR   aucHeader[HEADER_SIZE];
635
636         fail(pFile == NULL);
637
638         lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
639         DBG_DEC(lMaxBlock);
640         if (lMaxBlock < 1) {
641                 return -1;
642         }
643         tBBDLen = (size_t)(lMaxBlock + 1);
644         tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
645         DBG_DEC(tNumBbdBlocks);
646         ulRootStartblock = ulReadLong(pFile, 0x30);
647         DBG_DEC(ulRootStartblock);
648         ulSbdStartblock = ulReadLong(pFile, 0x3c);
649         DBG_DEC(ulSbdStartblock);
650         ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
651         DBG_HEX(ulAdditionalBBDlist);
652         ulSBLstartblock = ulReadLong(pFile,
653                         (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
654         DBG_DEC(ulSBLstartblock);
655         tSBDLen = (size_t)(ulReadLong(pFile,
656                         (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
657                         SMALL_BLOCK_SIZE);
658         /* All to be xcalloc-ed pointers to NULL */
659         aulRootList = NULL;
660         aulSbdList = NULL;
661         aulBbdList = NULL;
662         aulSBD = NULL;
663         aulBBD = NULL;
664 /* Big Block Depot */
665         aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
666         aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
667         iToGo = (int)tNumBbdBlocks;
668         vGetBbdList(pFile, min(iToGo, 109),  aulBbdList, 0x4c);
669         ulStart = 109;
670         iToGo -= 109;
671         while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
672                 ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
673                 vGetBbdList(pFile, min(iToGo, 127),
674                                         aulBbdList + ulStart, ulBdbListStart);
675                 ulAdditionalBBDlist = ulReadLong(pFile,
676                                         ulBdbListStart + 4 * 127);
677                 DBG_DEC(ulAdditionalBBDlist);
678                 DBG_HEX(ulAdditionalBBDlist);
679                 ulStart += 127;
680                 iToGo -= 127;
681         }
682         if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
683                 FREE_ALL();
684                 return -1;
685         }
686         aulBbdList = xfree(aulBbdList);
687 /* Small Block Depot */
688         aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
689         aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
690         for (iIndex = 0, ulTmp = ulSbdStartblock;
691              iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
692              iIndex++, ulTmp = aulBBD[ulTmp]) {
693                 if (ulTmp >= (ULONG)tBBDLen) {
694                         DBG_DEC(ulTmp);
695                         DBG_DEC(tBBDLen);
696                         werr(1, "The Big Block Depot is damaged");
697                 }
698                 aulSbdList[iIndex] = ulTmp;
699                 NO_DBG_HEX(aulSbdList[iIndex]);
700         }
701         if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
702                 FREE_ALL();
703                 return -1;
704         }
705         aulSbdList = xfree(aulSbdList);
706 /* Root list */
707         for (tRootListLen = 0, ulTmp = ulRootStartblock;
708              tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
709              tRootListLen++, ulTmp = aulBBD[ulTmp]) {
710                 if (ulTmp >= (ULONG)tBBDLen) {
711                         DBG_DEC(ulTmp);
712                         DBG_DEC(tBBDLen);
713                         werr(1, "The Big Block Depot is damaged");
714                 }
715         }
716         if (tRootListLen == 0) {
717                 werr(0, "No Rootlist found");
718                 FREE_ALL();
719                 return -1;
720         }
721         aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
722         for (iIndex = 0, ulTmp = ulRootStartblock;
723              iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
724              iIndex++, ulTmp = aulBBD[ulTmp]) {
725                 if (ulTmp >= (ULONG)tBBDLen) {
726                         DBG_DEC(ulTmp);
727                         DBG_DEC(tBBDLen);
728                         werr(1, "The Big Block Depot is damaged");
729                 }
730                 aulRootList[iIndex] = ulTmp;
731                 NO_DBG_DEC(aulRootList[iIndex]);
732         }
733         fail(tRootListLen != (size_t)iIndex);
734         bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
735         aulRootList = xfree(aulRootList);
736         if (!bSuccess) {
737                 FREE_ALL();
738                 return -1;
739         }
740 /* Small block list */
741         if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
742                 FREE_ALL();
743                 return -1;
744         }
745
746         if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
747                 DBG_DEC(PPS_info.tWordDocument.ulSize);
748                 FREE_ALL();
749                 werr(0, "I'm afraid the text stream of this file "
750                         "is too small to handle.");
751                 return -1;
752         }
753         /* Read the headerblock */
754         if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
755                         aulBBD, tBBDLen, BIG_BLOCK_SIZE,
756                         aucHeader, 0, HEADER_SIZE)) {
757                 FREE_ALL();
758                 return -1;
759         }
760         usIdent = usGetWord(0x00, aucHeader);
761         DBG_HEX(usIdent);
762         fail(usIdent != 0x8098 &&       /* Word 7 for oriental languages */
763              usIdent != 0x8099 &&       /* Word 7 for oriental languages */
764              usIdent != 0xa5dc &&       /* Word 6 & 7 */
765              usIdent != 0xa5ec &&       /* Word 7 & 97 & 98 */
766              usIdent != 0xa697 &&       /* Word 7 for oriental languages */
767              usIdent != 0xa699);        /* Word 7 for oriental languages */
768         iWordVersion = iGetVersionNumber(aucHeader);
769         if (iWordVersion < 6) {
770                 FREE_ALL();
771                 werr(0, "This file is from a version of Word before Word 6.");
772                 return -1;
773         }
774
775         /* Get the status flags from the header */
776         usDocStatus = usGetWord(0x0a, aucHeader);
777         if (usDocStatus & BIT(9)) {
778                 PPS_info.tTable = PPS_info.t1Table;
779         } else {
780                 PPS_info.tTable = PPS_info.t0Table;
781         }
782         /* Clean the entries that should not be used */
783         memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
784         memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
785
786         bSuccess = bGetDocumentText(pFile, &PPS_info,
787                         aulBBD, tBBDLen, aulSBD, tSBDLen,
788                         aucHeader, iWordVersion);
789         if (bSuccess) {
790                 vGetDocumentData(pFile, &PPS_info,
791                         aulBBD, tBBDLen, aucHeader, iWordVersion);
792                 vGetPropertyInfo(pFile, &PPS_info,
793                         aulBBD, tBBDLen, aulSBD, tSBDLen,
794                         aucHeader, iWordVersion);
795                 vSetDefaultTabWidth(pFile, &PPS_info,
796                         aulBBD, tBBDLen, aulSBD, tSBDLen,
797                         aucHeader, iWordVersion);
798                 vGetNotesInfo(pFile, &PPS_info,
799                         aulBBD, tBBDLen, aulSBD, tSBDLen,
800                         aucHeader, iWordVersion);
801         }
802         FREE_ALL();
803         return bSuccess ? iWordVersion : -1;
804 } /* end of iInitDocumentOLE */