sys/lib/ghostscript/pdf_base.ps

   1 %    Copyright (C) 1994-2003 artofcode LLC.  All rights reserved.
   2 %
   3 % This software is provided AS-IS with no warranty, either express or
   4 % implied.
   5 %
   6 % This software is distributed under license and may not be copied,
   7 % modified or distributed except as expressly authorized under the terms
   8 % of the license contained in the file LICENSE in this distribution.
   9 %
  10 % For more information about licensing, please refer to
  11 % http://www.ghostscript.com/licensing/. For information on
  12 % commercial licensing, go to http://www.artifex.com/licensing/ or
  13 % contact Artifex Software, Inc., 101 Lucas Valley Road #110,
  14 % San Rafael, CA  94903, U.S.A., +1(415)492-9861.
  15
  16 % $Id: pdf_base.ps,v 1.48 2005/09/16 19:01:30 ray Exp $
  17 % pdf_base.ps
  18 % Basic parser for PDF reader.
  19
  20 % This handles basic parsing of the file (including the trailer
  21 % and cross-reference table), as well as objects, object references,
  22 % streams, and name/number trees; it doesn't include any facilities for
  23 % making marks on the page.
  24
  25 /.setlanguagelevel where { pop 2 .setlanguagelevel } if
  26 .currentglobal true .setglobal
  27 /pdfdict where { pop } { /pdfdict 100 dict def } ifelse
  28 pdfdict begin
  29
  30 % Define the name interpretation dictionary for reading values.
  31 /valueopdict mark
  32   (<<) cvn { mark } bind        % don't push an actual mark!
  33   (>>) cvn { { .dicttomark } stopped {
  34       (   **** File has an unbalanced >> \(close dictionary\).\n)
  35       pdfformaterror
  36     } if
  37   } bind
  38   ([) cvn { mark } bind         % ditto
  39   (]) cvn dup load
  40 %  /true true           % see .pdfexectoken below
  41 %  /false false         % ibid.
  42 %  /null null           % ibid.
  43   /F dup cvx            % see Objects section below
  44   /R dup cvx            % see Objects section below
  45   /stream dup cvx       % see Streams section below
  46 .dicttomark readonly def
  47
  48 % ------ Utilities ------ %
  49
  50 % Define a scratch string.  The PDF language definition says that
  51 % no line in a PDF file can exceed 255 characters.
  52 /pdfstring 255 string def
  53
  54 % Read the previous line of a file.  If we aren't at a line boundary,
  55 % read the line containing the current position.
  56 % Skip any blank lines.
  57 /prevline               % - prevline <startpos> <substring>
  58  { PDFfile fileposition dup () pdfstring
  59    2 index 257 sub 0 .max PDFfile exch setfileposition
  60     {           % Stack: initpos linepos line string
  61       PDFfile fileposition
  62       PDFfile 2 index readline pop
  63       dup length 0 gt
  64        { 3 2 roll 5 -2 roll pop pop 2 index }
  65        { pop }
  66       ifelse
  67                 % Stack: initpos linepos line string startpos
  68       PDFfile fileposition 5 index ge { exit } if
  69       pop
  70     }
  71    loop pop pop 3 -1 roll pop
  72  } bind def
  73
  74 % Handle the PDF 1.2 #nn escape convention when reading from a file.
  75 % This should eventually be done in C.
  76 /.pdffixname {                  % <execname> .pdffixname <execname'>
  77   PDFversion 1.2 ge {
  78     dup .namestring (#) search {
  79       name#escape cvn exch pop
  80     } {
  81       pop
  82     } ifelse
  83   } if
  84 } bind def
  85 /name#escape                    % <post> <(#)> <pre> name#escape <string>
  86 { exch pop
  87   1 index 2 () /SubFileDecode filter dup (x) readhexstring
  88                 % Stack: post pre stream char t/f
  89   not { % tolerate, but complain about bad syntax
  90     pop closefile (#) concatstrings exch
  91     (   **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
  92     pdfformaterror
  93   } {
  94     exch closefile concatstrings
  95     exch 2 1 index length 2 sub getinterval
  96   } ifelse
  97   (#) search { name#escape } if concatstrings
  98 } bind def
  99
 100 % Execute a file, interpreting its executable names in a given
 101 % dictionary.  The name procedures may do whatever they want
 102 % to the operand stack.
 103 /.pdftokenerror {               % <count> <opdict> <errtoken> .pdftokenerror -
 104   BXlevel 0 le {
 105     (   **** Unknown operator: ') pdfformaterror
 106     dup =string cvs pdfformaterror
 107     % Attempt a retry scan of the element after changing to PDFScanInvNum
 108     << /PDFScanInvNum true >> setuserparams
 109     =string cvs
 110     token pop exch pop dup type
 111     dup /integertype eq exch /realtype eq or {
 112       exch pop exch pop
 113       (', processed as number, value: ) pdfformaterror
 114       dup =string cvs pdfformaterror (\n) pdfformaterror
 115      << /PDFScanInvNum null >> setuserparams    % reset to default scanning rules
 116       false     % suppress any stack cleanup
 117     } {
 118       % error was non-recoverable with modified scanning rules
 119     ('\n) pdfformaterror
 120       true
 121     } ifelse
 122   } {
 123     true
 124   } ifelse
 125   { % clean up the operand stack if this was non-recoverable
 126   pop pop count exch sub { pop } repeat % pop all the operands
 127   } if
 128 } bind def
 129 /.pdfexectoken {                % <count> <opdict> <exectoken> .pdfexectoken ?
 130   PDFDEBUG {
 131     pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
 132     PDFSTEP {
 133       pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
 134       PDFSTEPcount 1 gt {
 135         pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
 136       } {
 137         dup ==only
 138         (    step # ) print PDFtokencount =only
 139         ( ? ) print flush 1 false .outputpage
 140         (%stdin) (r) file 255 string readline {
 141           token {
 142             exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
 143           } {
 144             pdfdict /PDFSTEPcount 1 .forceput
 145           } ifelse % token
 146         } {
 147           pop /PDFSTEP false def         % EOF on stdin
 148         } ifelse % readline
 149       } ifelse % PDFSTEPcount > 1
 150     } {
 151       dup ==only () = flush
 152     } ifelse % PDFSTEP
 153   } if % PDFDEBUG
 154   2 copy .knownget {
 155     exch pop exch pop exch pop exec
 156   } {
 157                 % Normally, true, false, and null would appear in opdict
 158                 % and be treated as "operators".  However, there is a
 159                 % special fast case in the PostScript interpreter for names
 160                 % that are defined in, and only in, systemdict and/or
 161                 % userdict: putting these three names in the PDF dictionaries
 162                 % destroys this property for them, slowing down their
 163                 % interpretation in all PostScript code.  Therefore, we
 164                 % check for them explicitly here instead.
 165     dup dup dup /true eq exch /false eq or exch /null eq or {
 166       exch pop exch pop //systemdict exch get
 167     } {
 168       .pdftokenerror
 169     } ifelse
 170   } ifelse
 171 } bind def
 172 /.pdfrun {                      % <file> <opdict> .pdfrun -
 173         % Construct a procedure with the stack depth, file and opdict
 174         % bound into it.
 175   1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
 176   {     % Stack: ..operands.. count opdict file
 177     token {
 178       dup type /nametype eq {
 179         dup xcheck {
 180           .pdfexectoken
 181         } {
 182           .pdffixname
 183           exch pop exch pop PDFDEBUG {
 184             PDFSTEPcount 1 le {
 185               dup ==only ( ) print flush
 186             } if
 187           } if
 188         } ifelse
 189       } {
 190         exch pop exch pop PDFDEBUG {
 191           PDFSTEPcount 1 le {
 192             dup ==only ( ) print flush
 193           } if
 194         } if
 195       } ifelse
 196     } {
 197       (%%EOF) cvn cvx .pdfexectoken
 198     } ifelse
 199   }
 200   aload pop .packtomark cvx
 201   /loop cvx 2 packedarray cvx
 202   { stopped /PDFsource } aload pop
 203   PDFsource
 204   { store { stop } if } aload pop .packtomark cvx
 205   /PDFsource 3 -1 roll store exec
 206 } bind def
 207
 208 % Execute a file, like .pdfrun, for a marking context.
 209 % This temporarily rebinds LocalResources and DefaultQstate.
 210 /.pdfruncontext {               % <resdict> <file> <opdict> .pdfruncontext -
 211   /.pdfrun load LocalResources DefaultQstate
 212   /LocalResources 7 -1 roll store
 213   /DefaultQstate qstate store
 214   3 .execn
 215   /DefaultQstate exch store
 216   /LocalResources exch store
 217 } bind def
 218
 219 % Get the depth of the PDF operand stack.  The caller sets pdfemptycount
 220 % before calling .pdfrun or .pdfruncontext.  It is initially set by
 221 % pdf_main, and is also set by any routine which changes the operand
 222 % stack depth (currently .pdfpaintproc, although there are other callers
 223 % of .pdfrun{context} which have not been checked for opstack depth.
 224 /.pdfcount {            % - .pdfcount <count>
 225   count pdfemptycount sub
 226 } bind def
 227
 228 % ================================ Objects ================================ %
 229
 230 % Since we may have more than 64K objects, we have to use a 2-D array to
 231 % hold them (and the parallel Generations structure).
 232 /lshift 9 def
 233 /lnshift lshift neg def
 234 /lsubmask 1 lshift bitshift 1 sub def
 235 /lsublen lsubmask 1 add def
 236 /larray {       % - larray <larray>
 237   [ [] ]
 238 } bind def
 239 /lstring {      % - lstring <lstring>
 240   [ () ]
 241 } bind def
 242 /ltype {        % <lseq> type <type>
 243   0 get type
 244 } bind def
 245 /lget {         % <lseq> <index> lget <value>
 246   dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
 247 } bind def
 248 /lput {         % <lseq> <index> <value> lput -
 249   3 1 roll
 250   dup //lsubmask and 4 1 roll //lnshift bitshift get
 251   3 1 roll put
 252 } bind def
 253 /llength {      % <lseq> llength <length>
 254   dup length 1 sub dup //lshift bitshift
 255   3 1 roll get length add
 256 } bind def
 257 % lgrowto assumes newlength > llength(lseq)
 258 /growto {       % <string/array> <length> growto <string'/array'>
 259   1 index type /stringtype eq { string } { array } ifelse
 260   2 copy copy pop exch pop
 261 } bind def
 262 /lgrowto {      % <lseq> <newlength> lgrowto <lseq'>
 263     dup //lsubmask add //lnshift bitshift dup 3 index length gt {
 264         % Add more sub-arrays.  Start by completing the last existing one.
 265                 % Stack: lseq newlen newtoplen
 266     3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
 267                 % Stack: newlen newtoplen lseq
 268     [ exch aload pop
 269     counttomark 2 add -1 roll           % newtoplen
 270     counttomark sub { dup 0 0 getinterval lsublen growto } repeat
 271     dup 0 0 getinterval ] exch
 272   } {
 273     pop
 274   } ifelse
 275         % Expand the last sub-array.
 276   1 sub //lsubmask and 1 add
 277   exch dup dup length 1 sub 2 copy
 278                 % Stack: newsublen lseq lseq len-1 lseq len-1
 279   get 5 -1 roll growto put
 280 } bind def
 281 /lforall {      % <lseq> <proc> lforall -
 282   /forall cvx 2 packedarray cvx forall
 283 } bind def
 284
 285 % We keep track of PDF objects using the following PostScript variables:
 286 %
 287 %       Generations (lstring): Generations[N] holds 1+ the current
 288 %           generation number for object number N.  (As far as we can tell,
 289 %           this is needed only for error checking.)  For free objects,
 290 %           Generations[N] is 0.
 291 %
 292 %       Objects (larray): If object N is loaded, Objects[N] is the actual
 293 %           object; otherwise, Objects[N] is an executable integer giving
 294 %           the file offset of the object's location in the file.  If
 295 %           ObjectStream[N] is non-zero then Objects[N] contains the index
 296 %           into the object stream instead of the file offset of the object.
 297 %
 298 %       ObjectStream (larray): If object N is in an object stream then
 299 %           ObjectStream[N] holds the object number of the object stream.
 300 %           Otherwise ObjectStream[N] contains 0.  If ObjectStream[N]
 301 %           is non-zero then Objects[N] contains  the index into the object
 302 %           stream.
 303 %
 304 %       GlobalObjects (dictionary): If object N has been resolved in
 305 %           global VM, GlobalObjects[N] is the same as Objects[N]
 306 %           (except that GlobalObjects itself is stored in global VM,
 307 %           so the entry will not be deleted at the end of the page).
 308 %
 309 %       IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
 310 %           global VM.  This is an accelerator to avoid having to do a
 311 %           dictionary lookup in GlobalObjects when resolving every object.
 312
 313 % Initialize the PDF object tables.
 314 /initPDFobjects {               % - initPDFobjects -
 315   /ObjectStream larray def
 316   /Objects larray def
 317   /Generations lstring def
 318   .currentglobal true .setglobal
 319   /GlobalObjects 20 dict def
 320   .setglobal
 321   /IsGlobal lstring def
 322 } bind def
 323
 324 % Grow the tables to a specified size.
 325 /growPDFobjects {               % <minsize> growPDFobjects -
 326   dup ObjectStream llength gt {
 327     dup ObjectStream exch lgrowto /ObjectStream exch def
 328   } if
 329   dup Objects llength gt {
 330     dup Objects exch lgrowto /Objects exch def
 331   } if
 332   dup Generations llength gt {
 333     dup Generations exch lgrowto /Generations exch def
 334   } if
 335   dup IsGlobal llength gt {
 336     dup IsGlobal exch lgrowto /IsGlobal exch def
 337   } if
 338   pop
 339 } bind def
 340
 341 % We represent an unresolved object reference by a procedure of the form
 342 % {obj# gen# resolveR}.  This is not a possible PDF object, because PDF has
 343 % no way to represent procedures.  Since PDF in fact has no way to represent
 344 % any PostScript object that doesn't evaluate to itself, we can 'force'
 345 % a possibly indirect object painlessly with 'exec'.
 346 % Note that since we represent streams by executable dictionaries
 347 % (see below), we need both an xcheck and a type check to determine
 348 % whether an object has been resolved.
 349 /resolved? {            % <object#> resolved? <value> true
 350                         % <object#> resolved? false
 351   Objects 1 index lget dup xcheck {     % Check if executable
 352     dup type /integertype eq {          % Check if an integer
 353                 % Check whether the object is in GlobalObjects.
 354       pop IsGlobal 1 index lget 0 eq {  % 0 --> Not in GlabalObjects
 355         pop false                       % The object is not resolved
 356       } {                               % The object is in GlobalObjects
 357                 % Update Objects from GlobalObjects
 358         PDFDEBUG { (%Global=>local: ) print dup == } if
 359         GlobalObjects 1 index get dup Objects 4 1 roll lput true
 360       } ifelse
 361     } {                         % Else object is executable but not integer
 362       exch pop true             % Therefore must be executable dict. (stream)
 363     } ifelse
 364   } {                           % Else object is not executable.
 365     exch pop true               % Therefore it must have been resolved.
 366   } ifelse
 367 } bind def
 368 /oforce /exec load def
 369 /oget {         % <array> <index> oget <object>
 370                 % <dict> <key> oget <object>
 371                 % Before release 6.20, this procedure stored the resolved
 372                 % object back into the referring slot.  In order to support
 373                 % PDF linearization, we no longer do this.
 374   get oforce
 375 } bind def
 376 /oforce_array { % <array> oforce_array <array>
 377   [ exch { oforce } forall ]
 378 } bind def
 379 /oforce_elems { % <array> oforce_elems <first> ... <last>
 380   { oforce } forall
 381 } bind def
 382 % A null value in a dictionary is equivalent to an omitted key;
 383 % we must check for this specially.
 384 /knownoget {    % <dict> <key> knownoget <value> true
 385                 % <dict> <key> knownoget false
 386                 % See oget above regarding this procedure.
 387   .knownget {
 388     oforce dup null eq { pop false } { true } ifelse
 389   } {
 390     false
 391   } ifelse
 392 } bind def
 393
 394 % PDF 1.1 defines a 'foreign file reference', but not its meaning.
 395 % Per the specification, we convert these to nulls.
 396 /F {            % <file#> <object#> <generation#> F <object>
 397                 % Some PDF 1.1 files use F as a synonym for f!
 398    .pdfcount 3 lt { f } { pop pop pop null } ifelse
 399 } bind def
 400
 401 % Verify the generation number for a specified object
 402 % Note:  The values in Generations is the generation number plus 1.
 403 % If the value in Generations is zero then the object is free.
 404 /checkgeneration {  % <object#> <generation#> checkgeneration <object#> <OK>
 405   Generations 2 index lget 1 sub 1 index eq {   % If generation # match ...
 406     pop true                                    % Then return true
 407   } {                                   % Else not a match ...
 408     QUIET not {                         % Create warning message if not QUIET
 409       Generations 2 index lget 0 eq {   % Check if object is free ...
 410         (   **** Warning: reference to free object: )
 411       } {
 412         (   **** Warning: wrong generation: )
 413       } ifelse
 414       2 index =string cvs concatstrings ( ) concatstrings       % put obj #
 415       1 index =string cvs concatstrings ( R\n) concatstrings    % put gen #
 416       pdfformaterror                    % Output warning message
 417     } if
 418     0 eq
 419   } ifelse
 420 } bind def
 421 /R {            % <object#> <generation#> R <object>
 422   /resolveR cvx 3 packedarray cvx
 423 } bind def
 424
 425 % If we encounter an object definition while reading sequentially,
 426 % we just store it away and keep going.
 427 /objopdict mark
 428   valueopdict { } forall
 429   /endobj dup cvx
 430 .dicttomark readonly def
 431
 432 /obj {                  % <object#> <generation#> obj <object>
 433   PDFfile objopdict .pdfrun
 434 } bind def
 435
 436 /endobj {               % <object#> <generation#> <object> endobj <object>
 437   3 1 roll
 438                 % Read the xref entry if we haven't yet done so.
 439                 % This is only needed for generation # checking.
 440   1 index resolved? {
 441     pop
 442   } if
 443   checkgeneration {
 444                 % The only global objects we bother to save are
 445                 % (resource) dictionaries.
 446     1 index dup gcheck exch type /dicttype eq and {
 447       PDFDEBUG { (%Local=>global: ) print dup == } if
 448       GlobalObjects 1 index 3 index put
 449       IsGlobal 1 index 1 put
 450     } if
 451     Objects exch 2 index lput
 452   } {
 453     pop pop null
 454   } ifelse
 455 } bind def
 456
 457 % When resolving an object reference in an object stream, we stop at
 458 % the end of file.  Note:  Objects in an object stream do not have either
 459 % a starting 'obj' or and ending 'endobj'.
 460 /resolveobjstreamopdict mark
 461   valueopdict { } forall
 462   (%%EOF) cvn { exit } bind
 463 .dicttomark readonly def
 464
 465 % Note: This version of this function is not currently being used.
 466 % Resolve all objects in an object stream
 467 /resolveobjectstream {          % <object stream #> resolveobjectstream -
 468   PDFDEBUG { (%Resolving object stream: ) print } if
 469   0 resolveR    % Get the objectstream dict, all objstrms use 0 as the gen #
 470   dup /First get                % Save location of first object onto the stack
 471   1 index /N get                % Save number of objects onto the stack
 472   2 index false resolvestream   % Convert stream dict into a stream
 473   /ReusableStreamDecode filter  % We need to be able to position stream
 474                 % Objectstreams begin with list of object numbers and locations
 475                 % Create two arrays to hold object numbers and stream location
 476   1 index array                 % Array for holding object number
 477   2 index array                 % Array for holding stream object location
 478                 % Get the object numbers and locations.
 479   0 1 5 index 1 sub {           % Loop and collect obj # and locations
 480                 % Stack: objstreamdict First N objectstream [obj#] [loc] index
 481     2 index 1 index             % Setup to put obj# into object number array
 482     5 index token pop put       % Get stream, then get obj# and put into array
 483     1 index 1 index             % Setup to put object loc into location array
 484     5 index token pop put       % Get stream, get obj loc and put into array
 485     pop                         % Remove loop index
 486   } for
 487                 % Create a bytestring big enough for reading any object data
 488                 % Scan for the size of the largest object
 489   0 0                           % Init max object size and previous location
 490   2 index {                     % Loop through all object locations
 491                                 % Stack:  ... maxsize prevloc currentloc
 492     dup 4 1 roll                % Save copy of object location into stack
 493     exch sub                            % Object size = currentloc - prevloc
 494     .max                        % Determine maximum object size
 495     exch                        % Put max size under previous location
 496   } forall
 497   pop                           % Remove previous location
 498   .bigstring                    % Create bytestring based upon max obj size
 499                 % Move to the start of the object data
 500   3 index 6 index               % Get objectstream and start of first object
 501   setfileposition               % Move to the start of the data
 502                 % Read the data for all objects except the last.  We do
 503                 % not know the size of the last object so we need to treat
 504                 % it as a special case.
 505   0 1 6 index 2 sub {
 506     dup 4 index exch get        % Get our current object number
 507                 % Stack: objstreamdict First N objectstream [obj#] [loc]
 508                 %        bytestring loopindex object#
 509     dup resolved? {             % If we already have this object
 510         (yyy) = pstack (yyy) = flush xxx
 511       pop pop                   % Remove object and object number
 512       1 add 2 index exch get    % Get location of next object
 513       6 index add 6 index exch  % Form location of next object and get stream
 514       setfileposition           % Move to the start of the next object data
 515     } {                         % Else this is a new object ...
 516                 % We are going to create a string for reading the object
 517       2 index 0                 % use our working string
 518                 % Determine the size of the object
 519       5 index 4 index 1 add get % Get location of the next object
 520       6 index 5 index get       % Get location of this object
 521       sub                       % Size of object = next loc - this loc
 522       getinterval               % Create string for reading object
 523       6 index exch readstring pop       % Read object
 524       /ReusableStreamDecode filter      % Convert string into a stream
 525       resolveobjstreamopdict .pdfrun    % Get PDF object
 526       Objects exch 2 index exch lput     % Put object into Objects array
 527       pop pop                   % Remove object # and loop index
 528     } ifelse
 529   } for
 530   pop pop                       % Remove our working string and loc array
 531                 % Now read the last object in the object stream.  Since it
 532                 % is the last object, we can use the original stream and
 533                 % terminate when we hit the end of the stream
 534                 % Stack: objstreamdict First N objectstream [obj#]
 535   2 index 1 sub get             % Get our current object number
 536   dup resolved? not {           % If we do not already have this object
 537     exch                        % Get our object stream
 538     resolveobjstreamopdict .pdfrun      % Get PDF object
 539     Objects exch 2 index exch lput      % Put object into Objects array
 540   } if
 541   pop pop pop pop               % Clear stack
 542 } bind def
 543
 544 % Resolve all objects in an object stream
 545 /resolveobjectstream {          % <object stream #> resolveobjectstream -
 546   PDFDEBUG { (%Resolving object stream: ) print } if
 547   0 resolveR    % Get the objectstream dict, all objstrms use 0 as the gen #
 548   dup /Type get /ObjStm ne {    % Verify type is object stream
 549     (   **** Incorrect Type in object stream dictionary.\n) pdfformaterror
 550     /resolveobjectstream cvx /typecheck signalerror
 551   } if
 552   dup /N get                    % Save number of objects onto the stack
 553   1 index false resolvestream   % Convert stream dict into a stream
 554   /ReusableStreamDecode filter  % We need to be able to position stream
 555                 % Objectstreams begin with list of object numbers and locations
 556   1 index array                 % Create array for holding object number
 557                 % Get the object numbers
 558   0 1 4 index 1 sub {           % Loop and collect obj numbers
 559                 % Stack: objstreamdict N PDFDEBUG objectstream [obj#] loopindex
 560     1 index 1 index             % Setup to put obj# into object number array
 561     4 index token pop put       % Get stream, then get obj# and put into array
 562     2 index token pop pop pop   % Get stream, get obj loc and clear stack
 563   } for
 564                 % Move to the start of the object data
 565   1 index 4 index /First get    % Get objectstream and start of first object
 566   setfileposition               % Move to the start of the data
 567                 % We disable PDFDEBUG while reading the data stream.  We will
 568                 % print the data later
 569   PDFDEBUG /PDFDEBUG false def  % Save PDFDEBUG and disable it while reading
 570                 % Read the data for all objects.  We check to see if we get
 571                 % the number of objects that we expect.
 572                 % Stack: objstreamdict N objectstream [obj#] PDFDEBUG
 573   mark 4 -1 roll                % Get objectstream
 574   count 5 index add             % Determine stack depth with objects
 575   /PDFObjectStkCount exch def
 576   resolveobjstreamopdict .pdfrun % Get PDF objects
 577   PDFObjectStkCount count ne {  % Check stack depth
 578     (   **** Incorrect object count in object stream.\n) pdfformaterror
 579     /resolveobjectstream cvx /rangecheck signalerror
 580   } if
 581                 % We have the object data
 582   counttomark array astore      % Put objects into an array
 583   exch pop                      % Remove mark
 584   exch /PDFDEBUG exch def       % Restore PDFDEBUG flag
 585                 % Save the objects into Objects
 586   0 1 2 index length 1 sub {    % Loop through all objects
 587                 % Stack: objstreamdict N [obj#] [objects] loopindex
 588     dup 3 index exch get        % Get our current object number
 589     dup resolved? {             % If we already have this object
 590       pop pop                   % Remove object and object number
 591     } {                         % Else if we do not have this object
 592       PDFDEBUG { (%Resolving compressed object: [) print dup =only ( 0]) = } if
 593       Objects exch 3 index      % Put the object into Objects
 594       3 index get
 595       PDFDEBUG { dup === flush } if
 596       lput
 597     } ifelse
 598     pop                         % Remove loop index
 599   } for
 600   pop pop pop pop               % Remove objstream, N, (obj#], and [objects]
 601 } bind def
 602
 603 % When resolving an object reference, we stop at the endobj or endstream.
 604 /resolveopdict mark
 605   valueopdict { } forall
 606   /endstream { endobj exit } bind
 607   /endobj { endobj exit } bind
 608                 % OmniForm generates PDF file with endobj missing in some
 609                 % objects. AR ignores this. So we have to do it too.
 610   /obj { pop pop endobj exit } bind
 611 .dicttomark readonly def
 612
 613 /resolveR {             % <object#> <generation#> resolveR <object>
 614   PDFDEBUG {
 615     PDFSTEPcount 1 le {
 616       (%Resolving: ) print 2 copy 2 array astore ==
 617     } if
 618   } if
 619   1 index resolved? {           % If object has already been resolved ...
 620     exch pop exch pop           % then clear stack and return object
 621   } {                           % Else if not resolved ...
 622     PDFfile fileposition 3 1 roll       % Save current file position
 623     1 index Objects exch lget           % Get location of object from xref
 624     3 1 roll checkgeneration {          % Verify the generation number
 625                         % Stack: savepos objpos obj#
 626        ObjectStream 1 index lget dup 0 eq { % Check if obj in not an objstream
 627          pop exch PDFoffset add PDFfile exch setfileposition
 628          PDFfile token pop 2 copy ne
 629           { (   **** Unrecoverable error in xref!\n) pdfformaterror
 630             /resolveR cvx /rangecheck signalerror
 631           }
 632          if pop PDFfile token pop
 633          PDFfile token pop /obj ne
 634           { (   **** Unrecoverable error in xref!\n) pdfformaterror
 635             /resolveR cvx /rangecheck signalerror
 636           }
 637          if
 638          pdf_run_resolve        % PDFfile resolveopdict .pdfrun
 639       } {                       % Else the object is in an ObjectStream
 640                 % Process an objectstream object.  We are going to resolve all
 641                 % of the objects in sthe stream and place them into the Objects
 642                 % array.
 643                 % Stack: savepos objpos obj# objectstream#
 644         resolveobjectstream
 645         resolved? {             % If object has already been resolved ...
 646           exch pop              % Remove object pos from stack.
 647         } {
 648           pop pop null          % Pop objpos and obj#, put null for object
 649         } ifelse
 650       } ifelse
 651     } {                         % Else the generation number is wrong
 652             % Don't cache if the generation # is wrong.
 653         pop pop null            % Pop objpos and obj#, put null for object
 654     } ifelse                    % ifelse generation number is correct
 655     exch PDFfile exch setfileposition   % Return to original file position
 656   } ifelse
 657 } bind def
 658
 659 % ================================ Streams ================================ %
 660
 661 % We represent a stream by an executable dictionary that contains,
 662 % in addition to the contents of the original stream dictionary:
 663 %       /File - the file or string where the stream contents are stored,
 664 %         if the stream is not an external one.
 665 %       /FilePosition - iff File is a file, the position in the file
 666 %         where the contents start.
 667 %       /StreamKey - the key used to decrypt this stream, if any.
 668 % We do the real work of constructing the data stream only when the
 669 % contents are needed.
 670
 671 % Construct a stream.  The length is not reliable in the face of
 672 % different end-of-line conventions, but it's all we've got.
 673 %
 674 % PDF files are inconsistent about what may fall between the 'stream' keyword
 675 % and the actual stream data, and it appears that no one algorithm can
 676 % detect this reliably.  We used to try to guess whether the file included
 677 % extraneous \r and/or \n characters, but we no longer attempt to do so,
 678 % especially since the PDF 1.2 specification states flatly that the only
 679 % legal terminators following the 'stream' keyword are \n or \r\n, both of
 680 % which are properly skipped and discarded by the token operator.
 681 % Unfortunately, this doesn't account for other whitespace characters that
 682 % may have preceded the EOL, such as spaces or tabs. Thus we back up one
 683 % character and scan until we find the \n terminator.
 684 /stream {       % <dict> stream <modified_dict>
 685   dup /Length oget 0 eq {
 686     dup /Filter undef   % don't confuse any filters that require data
 687   } if
 688   dup /F known dup PDFsource PDFfile eq or {
 689     not {
 690       dup /File PDFfile put
 691       % make sure that we are just past the EOL \n character
 692       PDFfile dup fileposition 1 sub setfileposition    % back up one
 693       { PDFfile read pop dup 13 eq {
 694           % If there had been a \n, token would have advanced over it
 695           % thus, if the terminator was \r, we have a format error!
 696           (   **** Warning: stream operator not terminated by valid EOL.\n) pdfformaterror
 697           pop exit      % fileposition is OK (just past the \r).
 698         } if
 699         10 eq { exit } if
 700       } loop    % scan past \n
 701       dup /FilePosition PDFfile fileposition put
 702       PDFDEBUG {
 703         PDFSTEPcount 1 le {
 704           (%FilePosition: ) print dup /FilePosition get ==
 705         } if
 706       } if
 707     } if
 708     % Some (bad) PDf files have invalid stream lengths.  This causes problems
 709     % if we reposition beyond the end of the file.  So we compare the given
 710     % length to number of bytes left in the file.
 711     dup /Length oget
 712     dup PDFfile bytesavailable lt {     % compare to to bytes left in file
 713       PDFfile fileposition              % reposition to the end of stream
 714       add PDFfile exch setfileposition
 715     } {
 716       pop                               % bad stream length - do not reposition.
 717                                         % This will force a length warning below
 718     } ifelse
 719   } {
 720     pop
 721         % We're already reading from a stream, which we can't reposition.
 722         % Capture the sub-stream contents in a string.
 723     dup /Length oget string PDFsource exch readstring
 724     not {
 725       (   **** Warning: Unexpected EOF in stream!\n) pdfformaterror
 726       /stream cvx /rangecheck signalerror
 727     } if
 728     1 index exch /File exch put
 729   } ifelse
 730   PDFsource {token} stopped {
 731     pop null
 732   } {
 733     not { null } if
 734   } ifelse
 735   dup /endobj eq {
 736     % Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
 737     (   **** Warning: stream missing 'endstream'.\n) pdfformaterror
 738     pop /endstream              % fake a valid endstream
 739   } if
 740   /endstream ne {
 741     (   **** Warning: stream Length incorrect.\n) pdfformaterror
 742     dup /Length undef % prevent the use of the incorrect length.
 743     cvx endobj exit   % exit from .pdfrun now.
 744   } if
 745   cvx
 746 } bind def
 747 /endstream {
 748   exit
 749 } bind def
 750
 751 % Contrary to the published PDF (1.3) specification, Acrobat Reader
 752 % accepts abbreviated filter names everywhere, not just for in-line images,
 753 % and some applications (notably htmldoc) rely on this.
 754 /unabbrevfilterdict mark
 755   /AHx /ASCIIHexDecode  /A85 /ASCII85Decode  /CCF /CCITTFaxDecode
 756   /DCT /DCTDecode  /Fl /FlateDecode  /LZW /LZWDecode  /RL /RunLengthDecode
 757 .dicttomark readonly def
 758
 759 % Extract and apply filters.
 760 /filterparms {          % <dict> <DPkey> <Fkey> filterparms
 761                         %   <dict> <parms> <filternames>
 762   2 index exch knownoget {
 763     exch 2 index exch knownoget {
 764                 % Both filters and parameters.
 765       exch dup type /nametype eq {
 766         1 array astore exch
 767         dup type /arraytype ne { 1 array astore } if exch
 768       } if
 769     } {
 770                 % Filters, but no parameters.
 771       null exch
 772       dup type /nametype eq { 1 array astore } if
 773     } ifelse
 774   } {
 775                 % No filters: ignore parameters, if any.
 776     pop null { }
 777   } ifelse
 778 } bind def
 779 /filtername {           % <filtername> filtername <filtername'>
 780   //unabbrevfilterdict 1 index .knownget { exch pop } if
 781   dup /Filter resourcestatus { pop pop } {
 782     Repaired exch       % this error is not the creator's fault
 783     (   **** ERROR: Unable to process ) pdfformaterror
 784     64 string cvs pdfformaterror
 785     ( data. Page will be missing data.\n) pdfformaterror
 786     /Repaired exch store % restore the previous "Repaired" state
 787     % provide a filter that returns EOF (no data)
 788     /.EOFDecode
 789   } ifelse
 790 } bind def
 791 /applyfilters {         % <parms> <source> <filternames> applyfilters <stream>
 792   2 index null eq {
 793     { filtername filter }
 794   } {
 795     {           % Stack: parms source filtername
 796       2 index 0 oget dup null eq { pop } {
 797         exch filtername dup /JBIG2Decode eq { exch jbig2cachectx exch } if
 798       } ifelse filter
 799       exch dup length 1 sub 1 exch getinterval exch
 800     }
 801   } ifelse forall exch pop
 802 } bind def
 803
 804 % JBIG2 streams have an optional 'globals' stream obj for
 805 % sharing redundant data between page images. Here we resolve
 806 % that stream reference (if any) and run it through the decoder,
 807 % creating a special -jbig2globalctx- postscript object our
 808 % JBIG2Decode filter implementation looks for in the parm dict.
 809 /jbig2cachectx { % <parmdict> jbig2cachectx <parmdict>
 810   dup /JBIG2Globals knownoget {
 811     dup /Length oget
 812     % make global ctx
 813     PDFfile fileposition 3 1 roll % resolvestream is not reentrant
 814     exch true resolvestream exch .bytestring
 815     .readbytestring pop .jbig2makeglobalctx
 816     PDFfile 3 -1 roll setfileposition
 817     1 index exch
 818     /.jbig2globalctx exch put
 819   } if
 820 } bind def
 821
 822 % Resolve a stream dictionary to a PostScript stream.
 823 % Streams with no filters require special handling:
 824 %     - Whether we are going to interpret the stream, or If we are just
 825 %       going to read data from them, we impose a SubFileDecode filter
 826 %         that reads just the requisite amount of data.
 827 % Note that, in general, resolving a stream repositions PDFfile.
 828 % Clients must save and restore the position of PDFfile themselves.
 829 /resolvestream {        % <streamdict> <readdata?> resolvestream <stream>
 830   1 index /F knownoget {
 831                 % This stream is stored on an external file.
 832     (r) file 3 -1 roll
 833     /FDecodeParms /FFilter filterparms
 834                 % Stack: readdata? file dict parms filternames
 835     4 -1 roll exch
 836     pdf_decrypt_stream
 837     applyfilters
 838   } {
 839     exch dup /FilePosition .knownget {
 840       1 index /File get exch setfileposition
 841     } if
 842                 % Stack: readdata? dict
 843     /DecodeParms /Filter filterparms
 844                 % Stack: readdata? dict parms filternames
 845     2 index /File get exch
 846                 % Stack: readdata? dict parms file/string filternames
 847     pdf_decrypt_stream          % add decryption if needed
 848     dup length 0 eq {
 849                 % All the PDF filters have EOD markers, but in this case
 850                 % there is no specified filter.
 851       pop exch pop
 852                 % Stack: readdata? dict file/string
 853       2 index 1 index type /filetype eq or {
 854               % Use length for any files or reading data from any source.
 855         1 index /Length knownoget not { 0 } if
 856       } {
 857       0       % Otherwise length of 0 for whole string
 858       } ifelse
 859       2 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
 860     } {
 861       applyfilters
 862     } ifelse
 863   } ifelse
 864                 % Stack: readdata? dict file
 865   exch pop exch pop
 866 } bind def
 867
 868 % ============================ Name/number trees ============================ %
 869
 870 /nameoget {             % <nametree> <key> nameoget <obj|null>
 871   exch /Names exch .treeget
 872 } bind def
 873
 874 /numoget {              % <numtree> <key> numoget <obj|null>
 875   exch /Nums exch .treeget
 876 } bind def
 877
 878 /.treeget {             % <key> <leafkey> <tree> .treeget <obj|null>
 879   dup /Kids knownoget {
 880     exch pop .branchget
 881   } {
 882     exch get .leafget
 883   } ifelse
 884 } bind def
 885
 886 /.branchget {           %  <key> <leafkey> <kids> .branchget <obj|null>
 887   dup length 0 eq {
 888     pop pop pop null
 889   } {
 890     dup length -1 bitshift 2 copy oget
 891                         % Stack: key leafkey kids mid kids[mid]
 892     dup /Limits oget aload pop
 893                         % Stack: key leafkey kids mid kids[mid] min max
 894     6 index lt {
 895       pop pop
 896       1 add 1 index length 1 index sub getinterval .branchget
 897     } {
 898       5 index gt {
 899         pop
 900         0 exch getinterval .branchget
 901       } {
 902         exch pop exch pop .treeget
 903       } ifelse
 904     } ifelse
 905   } ifelse
 906 } bind def
 907
 908 /.leafget {             % <key> <pairs> .leafget <obj|null>
 909   dup length 2 eq {
 910     dup 0 get 2 index eq { 1 oget } { pop null } ifelse
 911     exch pop
 912   } {
 913     dup length -1 bitshift -2 and 2 copy oget
 914                         % Stack: key pairs mid pairs[mid]
 915     3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
 916     getinterval .leafget
 917   } ifelse
 918 } bind def
 919
 920 end                     % pdfdict
 921 .setglobal