sys/lib/ghostscript/pdf_base.ps

   1 %    Copyright (C) 1994-2003 artofcode LLC.  All rights reserved.
   2 %
   3 % This software is provided AS-IS with no warranty, either express or
   4 % implied.
   5 %
   6 % This software is distributed under license and may not be copied,
   7 % modified or distributed except as expressly authorized under the terms
   8 % of the license contained in the file LICENSE in this distribution.
   9 %
  10 % For more information about licensing, please refer to
  11 % http://www.ghostscript.com/licensing/. For information on
  12 % commercial licensing, go to http://www.artifex.com/licensing/ or
  13 % contact Artifex Software, Inc., 101 Lucas Valley Road #110,
  14 % San Rafael, CA  94903, U.S.A., +1(415)492-9861.
  15
  16 % $Id: pdf_base.ps,v 1.48 2005/09/16 19:01:30 ray Exp $
  17 % pdf_base.ps
  18 % Basic parser for PDF reader.
  19
  20 % This handles basic parsing of the file (including the trailer
  21 % and cross-reference table), as well as objects, object references,
  22 % streams, and name/number trees; it doesn't include any facilities for
  23 % making marks on the page.
  24
  25 /.setlanguagelevel where { pop 2 .setlanguagelevel } if
  26 .currentglobal true .setglobal
  27 /pdfdict where { pop } { /pdfdict 100 dict def } ifelse
  28 pdfdict begin
  29
  30 % Define the name interpretation dictionary for reading values.
  31 /valueopdict mark
  32   (<<) cvn { mark } bind        % don't push an actual mark!
  33   (>>) cvn { { .dicttomark } stopped {
  34       (   **** File has an unbalanced >> \(close dictionary\).\n)
  35       pdfformaterror
  36     } if
  37   } bind
  38   ([) cvn { mark } bind         % ditto
  39   (]) cvn dup load
  40 %  /true true           % see .pdfexectoken below
  41 %  /false false         % ibid.
  42 %  /null null           % ibid.
  43   /F dup cvx            % see Objects section below
  44   /R dup cvx            % see Objects section below
  45   /stream dup cvx       % see Streams section below
  46 .dicttomark readonly def
  47
  48 % ------ Utilities ------ %
  49
  50 % Define a scratch string.  The PDF language definition says that
  51 % no line in a PDF file can exceed 255 characters.
  52 /pdfstring 255 string def
  53
  54 % Read the previous line of a file.  If we aren't at a line boundary,
  55 % read the line containing the current position.
  56 % Skip any blank lines.
  57 /prevline               % - prevline <startpos> <substring>
  58  { PDFfile fileposition dup () pdfstring
  59    2 index 257 sub 0 .max PDFfile exch setfileposition
  60     {           % Stack: initpos linepos line string
  61       PDFfile fileposition
  62       PDFfile 2 index readline pop
  63       dup length 0 gt
  64        { 3 2 roll 5 -2 roll pop pop 2 index }
  65        { pop }
  66       ifelse
  67                 % Stack: initpos linepos line string startpos
  68       PDFfile fileposition 5 index ge { exit } if
  69       pop
  70     }
  71    loop pop pop 3 -1 roll pop
  72  } bind def
  73
  74 % Handle the PDF 1.2 #nn escape convention when reading from a file.
  75 % This should eventually be done in C.
  76 /.pdffixname {                  % <execname> .pdffixname <execname'>
  77   PDFversion 1.2 ge {
  78     dup .namestring (#) search {
  79       name#escape cvn exch pop
  80     } {
  81       pop
  82     } ifelse
  83   } if
  84 } bind def
  85 /name#escape                    % <post> <(#)> <pre> name#escape <string>
  86 { exch pop
  87   1 index 2 () /SubFileDecode filter dup (x) readhexstring
  88                 % Stack: post pre stream char t/f
  89   not { % tolerate, but complain about bad syntax
  90     pop closefile (#) concatstrings exch
  91     (   **** Warning: Invalid hex following '#' name escape, using literal '#' in name.\n)
  92     pdfformaterror
  93   } {
  94     exch closefile concatstrings
  95     exch 2 1 index length 2 sub getinterval
  96   } ifelse
  97   (#) search { name#escape } if concatstrings
  98 } bind def
  99
 100 % Execute a file, interpreting its executable names in a given
 101 % dictionary.  The name procedures may do whatever they want
 102 % to the operand stack.
 103 /.pdftokenerror {               % <count> <opdict> <errtoken> .pdftokenerror -
 104   BXlevel 0 le {
 105     (   **** Unknown operator: ') pdfformaterror
 106     dup =string cvs pdfformaterror
 107     % Attempt a retry scan of the element after changing to PDFScanInvNum
 108     << /PDFScanInvNum true >> setuserparams
 109     =string cvs
 110     token pop exch pop dup type
 111     dup /integertype eq exch /realtype eq or {
 112       exch pop exch pop
 113       (', processed as number, value: ) pdfformaterror
 114       dup =string cvs pdfformaterror (\n) pdfformaterror
 115      << /PDFScanInvNum null >> setuserparams    % reset to default scanning rules
 116       false     % suppress any stack cleanup
 117     } {
 118       % error was non-recoverable with modified scanning rules
 119     ('\n) pdfformaterror
 120       true
 121     } ifelse
 122   } {
 123     true
 124   } ifelse
 125   { % clean up the operand stack if this was non-recoverable
 126   pop pop count exch sub { pop } repeat % pop all the operands
 127   } if
 128 } bind def
 129 /.pdfexectoken {                % <count> <opdict> <exectoken> .pdfexectoken ?
 130   PDFDEBUG {
 131     pdfdict /PDFSTEPcount known not { pdfdict /PDFSTEPcount 1 .forceput } if
 132     PDFSTEP {
 133       pdfdict /PDFtokencount 2 copy .knownget { 1 add } { 1 } ifelse .forceput
 134       PDFSTEPcount 1 gt {
 135         pdfdict /PDFSTEPcount PDFSTEPcount 1 sub .forceput
 136       } {
 137         dup ==only
 138         (    step # ) print PDFtokencount =only
 139         ( ? ) print flush 1 false .outputpage
 140         (%stdin) (r) file 255 string readline {
 141           token {
 142             exch pop pdfdict /PDFSTEPcount 3 -1 roll .forceput
 143           } {
 144             pdfdict /PDFSTEPcount 1 .forceput
 145           } ifelse % token
 146         } {
 147           pop /PDFSTEP false def         % EOF on stdin
 148         } ifelse % readline
 149       } ifelse % PDFSTEPcount > 1
 150     } {
 151       dup ==only () = flush
 152     } ifelse % PDFSTEP
 153   } if % PDFDEBUG
 154   2 copy .knownget {
 155     exch pop exch pop exch pop exec
 156   } {
 157                 % Normally, true, false, and null would appear in opdict
 158                 % and be treated as "operators".  However, there is a
 159                 % special fast case in the PostScript interpreter for names
 160                 % that are defined in, and only in, systemdict and/or
 161                 % userdict: putting these three names in the PDF dictionaries
 162                 % destroys this property for them, slowing down their
 163                 % interpretation in all PostScript code.  Therefore, we
 164                 % check for them explicitly here instead.
 165     dup dup dup /true eq exch /false eq or exch /null eq or {
 166       exch pop exch pop //systemdict exch get
 167     } {
 168       .pdftokenerror
 169     } ifelse
 170   } ifelse
 171 } bind def
 172 /.pdfrun {                      % <file> <opdict> .pdfrun -
 173         % Construct a procedure with the stack depth, file and opdict
 174         % bound into it.
 175   1 index cvlit count 2 sub 3 1 roll mark mark 5 2 roll
 176   {     % Stack: ..operands.. count opdict file
 177     token {
 178       dup type /nametype eq {
 179         dup xcheck {
 180           .pdfexectoken
 181         } {
 182           .pdffixname
 183           exch pop exch pop PDFDEBUG {
 184             PDFSTEPcount 1 le {
 185               dup ==only ( ) print flush
 186             } if
 187           } if
 188         } ifelse
 189       } {
 190         exch pop exch pop PDFDEBUG {
 191           PDFSTEPcount 1 le {
 192             dup ==only ( ) print flush
 193           } if
 194         } if
 195       } ifelse
 196     } {
 197       (%%EOF) cvn cvx .pdfexectoken
 198     } ifelse
 199   }
 200   aload pop .packtomark cvx
 201   /loop cvx 2 packedarray cvx
 202   { stopped /PDFsource } aload pop
 203   PDFsource
 204   { store { stop } if } aload pop .packtomark cvx
 205   /PDFsource 3 -1 roll store exec
 206 } bind def
 207
 208 % Execute a file, like .pdfrun, for a marking context.
 209 % This temporarily rebinds LocalResources and DefaultQstate.
 210 /.pdfruncontext {               % <resdict> <file> <opdict> .pdfruncontext -
 211   /.pdfrun load LocalResources DefaultQstate
 212   /LocalResources 7 -1 roll store
 213   /DefaultQstate qstate store
 214   3 .execn
 215   /DefaultQstate exch store
 216   /LocalResources exch store
 217 } bind def
 218
 219 % Get the depth of the PDF operand stack.  The caller sets pdfemptycount
 220 % before calling .pdfrun or .pdfruncontext.  It is initially set by
 221 % pdf_main, and is also set by any routine which changes the operand
 222 % stack depth (currently .pdfpaintproc, although there are other callers
 223 % of .pdfrun{context} which have not been checked for opstack depth.
 224 /.pdfcount {            % - .pdfcount <count>
 225   count pdfemptycount sub
 226 } bind def
 227
 228 % ================================ Objects ================================ %
 229
 230 % Since we may have more than 64K objects, we have to use a 2-D array to
 231 % hold them (and the parallel Generations structure).
 232 /lshift 9 def
 233 /lnshift lshift neg def
 234 /lsubmask 1 lshift bitshift 1 sub def
 235 /lsublen lsubmask 1 add def
 236 /larray {       % - larray <larray>
 237   [ [] ]
 238 } bind def
 239 /lstring {      % - lstring <lstring>
 240   [ () ]
 241 } bind def
 242 /ltype {        % <lseq> type <type>
 243   0 get type
 244 } bind def
 245 /lget {         % <lseq> <index> lget <value>
 246   dup //lsubmask and 3 1 roll //lnshift bitshift get exch get
 247 } bind def
 248 /lput {         % <lseq> <index> <value> lput -
 249   3 1 roll
 250   dup //lsubmask and 4 1 roll //lnshift bitshift get
 251   3 1 roll put
 252 } bind def
 253 /llength {      % <lseq> llength <length>
 254   dup length 1 sub dup //lshift bitshift
 255   3 1 roll get length add
 256 } bind def
 257 % lgrowto assumes newlength > llength(lseq)
 258 /growto {       % <string/array> <length> growto <string'/array'>
 259   1 index type /stringtype eq { string } { array } ifelse
 260   2 copy copy pop exch pop
 261 } bind def
 262 /lgrowto {      % <lseq> <newlength> lgrowto <lseq'>
 263     dup //lsubmask add //lnshift bitshift dup 3 index length gt {
 264         % Add more sub-arrays.  Start by completing the last existing one.
 265                 % Stack: lseq newlen newtoplen
 266     3 -1 roll dup llength 1 sub //lsubmask or 1 add lgrowto
 267                 % Stack: newlen newtoplen lseq
 268     [ exch aload pop
 269     counttomark 2 add -1 roll           % newtoplen
 270     counttomark sub { dup 0 0 getinterval lsublen growto } repeat
 271     dup 0 0 getinterval ] exch
 272   } {
 273     pop
 274   } ifelse
 275         % Expand the last sub-array.
 276   1 sub //lsubmask and 1 add
 277   exch dup dup length 1 sub 2 copy
 278                 % Stack: newsublen lseq lseq len-1 lseq len-1
 279   get 5 -1 roll growto put
 280 } bind def
 281 /lforall {      % <lseq> <proc> lforall -
 282   /forall cvx 2 packedarray cvx forall
 283 } bind def
 284
 285 % We keep track of PDF objects using the following PostScript variables:
 286 %
 287 %       Generations (lstring): Generations[N] holds 1+ the current
 288 %           generation number for object number N.  (As far as we can tell,
 289 %           this is needed only for error checking.)  For free objects,
 290 %           Generations[N] is 0.
 291 %
 292 %       Objects (larray): If object N is loaded, Objects[N] is the actual
 293 %           object; otherwise, Objects[N] is an executable integer giving
 294 %           the file offset of the object's location in the file.  If
 295 %           ObjectStream[N] is non-zero then Objects[N] contains the index
 296 %           into the object stream instead of the file offset of the object.
 297 %
 298 %       ObjectStream (larray): If object N is in an object stream then
 299 %           ObjectStream[N] holds the object number of the object stream.
 300 %           Otherwise ObjectStream[N] contains 0.  If ObjectStream[N]
 301 %           is non-zero then Objects[N] contains  the index into the object
 302 %           stream.
 303 %
 304 %       GlobalObjects (dictionary): If object N has been resolved in
 305 %           global VM, GlobalObjects[N] is the same as Objects[N]
 306 %           (except that GlobalObjects itself is stored in global VM,
 307 %           so the entry will not be deleted at the end of the page).
 308 %
 309 %       IsGlobal (lstring): IsGlobal[N] = 1 iff object N was resolved in
 310 %           global VM.  This is an accelerator to avoid having to do a
 311 %           dictionary lookup in GlobalObjects when resolving every object.
 312
 313 % Initialize the PDF object tables.
 314 /initPDFobjects {               % - initPDFobjects -
 315   /ObjectStream larray def
 316   /Objects larray def
 317   /Generations lstring def
 318   .currentglobal true .setglobal
 319   /GlobalObjects 20 dict def
 320   .setglobal
 321   /IsGlobal lstring def
 322 } bind def
 323
 324 % Grow the tables to a specified size.
 325 /growPDFobjects {               % <minsize> growPDFobjects -
 326   dup ObjectStream llength gt {
 327     dup ObjectStream exch lgrowto /ObjectStream exch def
 328   } if
 329   dup Objects llength gt {
 330     dup Objects exch lgrowto /Objects exch def
 331   } if
 332   dup Generations llength gt {
 333     dup Generations exch lgrowto /Generations exch def
 334   } if
 335   dup IsGlobal llength gt {
 336     dup IsGlobal exch lgrowto /IsGlobal exch def
 337   } if
 338   pop
 339 } bind def
 340
 341 % We represent an unresolved object reference by a procedure of the form
 342 % {obj# gen# resolveR}.  This is not a possible PDF object, because PDF has
 343 % no way to represent procedures.  Since PDF in fact has no way to represent
 344 % any PostScript object that doesn't evaluate to itself, we can 'force'
 345 % a possibly indirect object painlessly with 'exec'.
 346 % Note that since we represent streams by executable dictionaries
 347 % (see below), we need both an xcheck and a type check to determine
 348 % whether an object has been resolved.
 349 /resolved? {            % <object#> resolved? <value> true
 350                         % <object#> resolved? false
 351   Objects 1 index lget dup xcheck {     % Check if executable
 352     dup type /integertype eq {          % Check if an integer
 353                 % Check whether the object is in GlobalObjects.
 354       pop IsGlobal 1 index lget 0 eq {  % 0 --> Not in GlabalObjects
 355         pop false                       % The object is not resolved
 356       } {                               % The object is in GlobalObjects
 357                 % Update Objects from GlobalObjects
 358         PDFDEBUG { (%Global=>local: ) print dup == } if
 359         GlobalObjects 1 index get dup Objects 4 1 roll lput true
 360       } ifelse
 361     } {                         % Else object is executable but not integer
 362       exch pop true             % Therefore must be executable dict. (stream)
 363     } ifelse
 364   } {                           % Else object is not executable.
 365     exch pop true               % Therefore it must have been resolved.
 366   } ifelse
 367 } bind def
 368 /oforce /exec load def
 369 /oget {         % <array> <index> oget <object>
 370                 % <dict> <key> oget <object>
 371                 % Before release 6.20, this procedure stored the resolved
 372                 % object back into the referring slot.  In order to support
 373                 % PDF linearization, we no longer do this.
 374   get oforce
 375 } bind def
 376 /oforce_array { % <array> oforce_array <array>
 377   [ exch { oforce } forall ]
 378 } bind def
 379 /oforce_elems { % <array> oforce_elems <first> ... <last>
 380   { oforce } forall
 381 } bind def
 382 % A null value in a dictionary is equivalent to an omitted key;
 383 % we must check for this specially.
 384 /knownoget {    % <dict> <key> knownoget <value> true
 385                 % <dict> <key> knownoget false
 386                 % See oget above regarding this procedure.
 387   .knownget {
 388     oforce dup null eq { pop false } { true } ifelse
 389   } {
 390     false
 391   } ifelse
 392 } bind def
 393
 394 % PDF 1.1 defines a 'foreign file reference', but not its meaning.
 395 % Per the specification, we convert these to nulls.
 396 /F {            % <file#> <object#> <generation#> F <object>
 397                 % Some PDF 1.1 files use F as a synonym for f!
 398    .pdfcount 3 lt { f } { pop pop pop null } ifelse
 399 } bind def
 400
 401 % Verify the generation number for a specified object
 402 % Note:  The values in Generations is the generation number plus 1.
 403 % If the value in Generations is zero then the object is free.
 404 /checkgeneration {  % <object#> <generation#> checkgeneration <object#> <OK>
 405   Generations 2 index lget 1 sub 1 index eq {   % If generation # match ...
 406     pop true                                    % Then return true
 407   } {                                   % Else not a match ...
 408     QUIET not {                         % Create warning message if not QUIET
 409       Generations 2 index lget 0 eq {   % Check if object is free ...
 410         (   **** Warning: reference to free object: )
 411       } {
 412         (   **** Warning: wrong generation: )
 413       } ifelse
 414       2 index =string cvs concatstrings ( ) concatstrings       % put obj #
 415       exch =string cvs concatstrings ( R\n) concatstrings       % put gen #
 416       pdfformaterror                    % Output warning message
 417     } {                                 % Else QUIET ...
 418       pop                               % Pop generation umber
 419     } ifelse false                      % Return false if gen # not match
 420   } ifelse
 421 } bind def
 422 /R {            % <object#> <generation#> R <object>
 423   /resolveR cvx 3 packedarray cvx
 424 } bind def
 425
 426 % If we encounter an object definition while reading sequentially,
 427 % we just store it away and keep going.
 428 /objopdict mark
 429   valueopdict { } forall
 430   /endobj dup cvx
 431 .dicttomark readonly def
 432
 433 /obj {                  % <object#> <generation#> obj <object>
 434   PDFfile objopdict .pdfrun
 435 } bind def
 436
 437 /endobj {               % <object#> <generation#> <object> endobj <object>
 438   3 1 roll
 439                 % Read the xref entry if we haven't yet done so.
 440                 % This is only needed for generation # checking.
 441   1 index resolved? {
 442     pop
 443   } if
 444   checkgeneration {
 445                 % The only global objects we bother to save are
 446                 % (resource) dictionaries.
 447     1 index dup gcheck exch type /dicttype eq and {
 448       PDFDEBUG { (%Local=>global: ) print dup == } if
 449       GlobalObjects 1 index 3 index put
 450       IsGlobal 1 index 1 put
 451     } if
 452     Objects exch 2 index lput
 453   } {
 454     pop pop null
 455   } ifelse
 456 } bind def
 457
 458 % When resolving an object reference in an object stream, we stop at
 459 % the end of file.  Note:  Objects in an object stream do not have either
 460 % a starting 'obj' or and ending 'endobj'.
 461 /resolveobjstreamopdict mark
 462   valueopdict { } forall
 463   (%%EOF) cvn { exit } bind
 464 .dicttomark readonly def
 465
 466 % Note: This version of this function is not currently being used.
 467 % Resolve all objects in an object stream
 468 /resolveobjectstream {          % <object stream #> resolveobjectstream -
 469   PDFDEBUG { (%Resolving object stream: ) print } if
 470   0 resolveR    % Get the objectstream dict, all objstrms use 0 as the gen #
 471   dup /First get                % Save location of first object onto the stack
 472   1 index /N get                % Save number of objects onto the stack
 473   2 index false resolvestream   % Convert stream dict into a stream
 474   /ReusableStreamDecode filter  % We need to be able to position stream
 475                 % Objectstreams begin with list of object numbers and locations
 476                 % Create two arrays to hold object numbers and stream location
 477   1 index array                 % Array for holding object number
 478   2 index array                 % Array for holding stream object location
 479                 % Get the object numbers and locations.
 480   0 1 5 index 1 sub {           % Loop and collect obj # and locations
 481                 % Stack: objstreamdict First N objectstream [obj#] [loc] index
 482     2 index 1 index             % Setup to put obj# into object number array
 483     5 index token pop put       % Get stream, then get obj# and put into array
 484     1 index 1 index             % Setup to put object loc into location array
 485     5 index token pop put       % Get stream, get obj loc and put into array
 486     pop                         % Remove loop index
 487   } for
 488                 % Create a bytestring big enough for reading any object data
 489                 % Scan for the size of the largest object
 490   0 0                           % Init max object size and previous location
 491   2 index {                     % Loop through all object locations
 492                                 % Stack:  ... maxsize prevloc currentloc
 493     dup 4 1 roll                % Save copy of object location into stack
 494     exch sub                            % Object size = currentloc - prevloc
 495     .max                        % Determine maximum object size
 496     exch                        % Put max size under previous location
 497   } forall
 498   pop                           % Remove previous location
 499   .bigstring                    % Create bytestring based upon max obj size
 500                 % Move to the start of the object data
 501   3 index 6 index               % Get objectstream and start of first object
 502   setfileposition               % Move to the start of the data
 503                 % Read the data for all objects except the last.  We do
 504                 % not know the size of the last object so we need to treat
 505                 % it as a special case.
 506   0 1 6 index 2 sub {
 507     dup 4 index exch get        % Get our current object number
 508                 % Stack: objstreamdict First N objectstream [obj#] [loc]
 509                 %        bytestring loopindex object#
 510     dup resolved? {             % If we already have this object
 511         (yyy) = pstack (yyy) = flush xxx
 512       pop pop                   % Remove object and object number
 513       1 add 2 index exch get    % Get location of next object
 514       6 index add 6 index exch  % Form location of next object and get stream
 515       setfileposition           % Move to the start of the next object data
 516     } {                         % Else this is a new object ...
 517                 % We are going to create a string for reading the object
 518       2 index 0                 % use our working string
 519                 % Determine the size of the object
 520       5 index 4 index 1 add get % Get location of the next object
 521       6 index 5 index get       % Get location of this object
 522       sub                       % Size of object = next loc - this loc
 523       getinterval               % Create string for reading object
 524       6 index exch readstring pop       % Read object
 525       /ReusableStreamDecode filter      % Convert string into a stream
 526       resolveobjstreamopdict .pdfrun    % Get PDF object
 527       Objects exch 2 index exch lput     % Put object into Objects array
 528       pop pop                   % Remove object # and loop index
 529     } ifelse
 530   } for
 531   pop pop                       % Remove our working string and loc array
 532                 % Now read the last object in the object stream.  Since it
 533                 % is the last object, we can use the original stream and
 534                 % terminate when we hit the end of the stream
 535                 % Stack: objstreamdict First N objectstream [obj#]
 536   2 index 1 sub get             % Get our current object number
 537   dup resolved? not {           % If we do not already have this object
 538     exch                        % Get our object stream
 539     resolveobjstreamopdict .pdfrun      % Get PDF object
 540     Objects exch 2 index exch lput      % Put object into Objects array
 541   } if
 542   pop pop pop pop               % Clear stack
 543 } bind def
 544
 545 % Resolve all objects in an object stream
 546 /resolveobjectstream {          % <object stream #> resolveobjectstream -
 547   PDFDEBUG { (%Resolving object stream: ) print } if
 548   0 resolveR    % Get the objectstream dict, all objstrms use 0 as the gen #
 549   dup /Type get /ObjStm ne {    % Verify type is object stream
 550     (   **** Incorrect Type in object stream dictionary.\n) pdfformaterror
 551     /resolveobjectstream cvx /typecheck signalerror
 552   } if
 553   dup /N get                    % Save number of objects onto the stack
 554   1 index false resolvestream   % Convert stream dict into a stream
 555   /ReusableStreamDecode filter  % We need to be able to position stream
 556                 % Objectstreams begin with list of object numbers and locations
 557   1 index array                 % Create array for holding object number
 558                 % Get the object numbers
 559   0 1 4 index 1 sub {           % Loop and collect obj numbers
 560                 % Stack: objstreamdict N PDFDEBUG objectstream [obj#] loopindex
 561     1 index 1 index             % Setup to put obj# into object number array
 562     4 index token pop put       % Get stream, then get obj# and put into array
 563     2 index token pop pop pop   % Get stream, get obj loc and clear stack
 564   } for
 565                 % Move to the start of the object data
 566   1 index 4 index /First get    % Get objectstream and start of first object
 567   setfileposition               % Move to the start of the data
 568                 % We disable PDFDEBUG while reading the data stream.  We will
 569                 % print the data later
 570   PDFDEBUG /PDFDEBUG false def  % Save PDFDEBUG and disable it while reading
 571                 % Read the data for all objects.  We check to see if we get
 572                 % the number of objects that we expect.
 573                 % Stack: objstreamdict N objectstream [obj#] PDFDEBUG
 574   mark 4 -1 roll                % Get objectstream
 575   count 5 index add             % Determine stack depth with objects
 576   /PDFObjectStkCount exch def
 577   resolveobjstreamopdict .pdfrun % Get PDF objects
 578   PDFObjectStkCount count ne {  % Check stack depth
 579     (   **** Incorrect object count in object stream.\n) pdfformaterror
 580     /resolveobjectstream cvx /rangecheck signalerror
 581   } if
 582                 % We have the object data
 583   counttomark array astore      % Put objects into an array
 584   exch pop                      % Remove mark
 585   exch /PDFDEBUG exch def       % Restore PDFDEBUG flag
 586                 % Save the objects into Objects
 587   0 1 2 index length 1 sub {    % Loop through all objects
 588                 % Stack: objstreamdict N [obj#] [objects] loopindex
 589     dup 3 index exch get        % Get our current object number
 590     dup resolved? {             % If we already have this object
 591       pop pop                   % Remove object and object number
 592     } {                         % Else if we do not have this object
 593       PDFDEBUG { (%Resolving compressed object: [) print dup =only ( 0]) = } if
 594       Objects exch 3 index      % Put the object into Objects
 595       3 index get
 596       PDFDEBUG { dup === flush } if
 597       lput
 598     } ifelse
 599     pop                         % Remove loop index
 600   } for
 601   pop pop pop pop               % Remove objstream, N, (obj#], and [objects]
 602 } bind def
 603
 604 % When resolving an object reference, we stop at the endobj or endstream.
 605 /resolveopdict mark
 606   valueopdict { } forall
 607   /endstream { endobj exit } bind
 608   /endobj { endobj exit } bind
 609                 % OmniForm generates PDF file with endobj missing in some
 610                 % objects. AR ignores this. So we have to do it too.
 611   /obj { pop pop endobj exit } bind
 612 .dicttomark readonly def
 613
 614 /resolveR {             % <object#> <generation#> resolveR <object>
 615   PDFDEBUG {
 616     PDFSTEPcount 1 le {
 617       (%Resolving: ) print 2 copy 2 array astore ==
 618     } if
 619   } if
 620   1 index resolved? {           % If object has already been resolved ...
 621     exch pop exch pop           % then clear stack and return object
 622   } {                           % Else if not resolved ...
 623     PDFfile fileposition 3 1 roll       % Save current file position
 624     1 index Objects exch lget           % Get location of object from xref
 625     3 1 roll checkgeneration {          % Verify the generation number
 626                         % Stack: savepos objpos obj#
 627        ObjectStream 1 index lget dup 0 eq { % Check if obj in not an objstream
 628          pop exch PDFoffset add PDFfile exch setfileposition
 629          PDFfile token pop 2 copy ne
 630           { (   **** Unrecoverable error in xref!\n) pdfformaterror
 631             /resolveR cvx /rangecheck signalerror
 632           }
 633          if pop PDFfile token pop
 634          PDFfile token pop /obj ne
 635           { (   **** Unrecoverable error in xref!\n) pdfformaterror
 636             /resolveR cvx /rangecheck signalerror
 637           }
 638          if
 639          pdf_run_resolve        % PDFfile resolveopdict .pdfrun
 640       } {                       % Else the object is in an ObjectStream
 641                 % Process an objectstream object.  We are going to resolve all
 642                 % of the objects in sthe stream and place them into the Objects
 643                 % array.
 644                 % Stack: savepos objpos obj# objectstream#
 645         resolveobjectstream
 646         resolved? {             % If object has already been resolved ...
 647           exch pop              % Remove object pos from stack.
 648         } {
 649           pop pop null          % Pop objpos and obj#, put null for object
 650         } ifelse
 651       } ifelse
 652     } {                         % Else the generation number is wrong
 653             % Don't cache if the generation # is wrong.
 654         pop pop null            % Pop objpos and obj#, put null for object
 655     } ifelse                    % ifelse generation number is correct
 656     exch PDFfile exch setfileposition   % Return to original file position
 657   } ifelse
 658 } bind def
 659
 660 % ================================ Streams ================================ %
 661
 662 % We represent a stream by an executable dictionary that contains,
 663 % in addition to the contents of the original stream dictionary:
 664 %       /File - the file or string where the stream contents are stored,
 665 %         if the stream is not an external one.
 666 %       /FilePosition - iff File is a file, the position in the file
 667 %         where the contents start.
 668 %       /StreamKey - the key used to decrypt this stream, if any.
 669 % We do the real work of constructing the data stream only when the
 670 % contents are needed.
 671
 672 % Construct a stream.  The length is not reliable in the face of
 673 % different end-of-line conventions, but it's all we've got.
 674 %
 675 % PDF files are inconsistent about what may fall between the 'stream' keyword
 676 % and the actual stream data, and it appears that no one algorithm can
 677 % detect this reliably.  We used to try to guess whether the file included
 678 % extraneous \r and/or \n characters, but we no longer attempt to do so,
 679 % especially since the PDF 1.2 specification states flatly that the only
 680 % legal terminators following the 'stream' keyword are \n or \r\n, both of
 681 % which are properly skipped and discarded by the token operator.
 682 % Unfortunately, this doesn't account for other whitespace characters that
 683 % may have preceded the EOL, such as spaces or tabs. Thus we back up one
 684 % character and scan until we find the \n terminator.
 685 /stream {       % <dict> stream <modified_dict>
 686   dup /Length oget 0 eq {
 687     dup /Filter undef   % don't confuse any filters that require data
 688   } if
 689   dup /F known dup PDFsource PDFfile eq or {
 690     not {
 691       dup /File PDFfile put
 692       % make sure that we are just past the EOL \n character
 693       PDFfile dup fileposition 1 sub setfileposition    % back up one
 694       { PDFfile read pop dup 13 eq {
 695           % If there had been a \n, token would have advanced over it
 696           % thus, if the terminator was \r, we have a format error!
 697           (   **** Warning: stream operator not terminated by valid EOL.\n) pdfformaterror
 698           pop exit      % fileposition is OK (just past the \r).
 699         } if
 700         10 eq { exit } if
 701       } loop    % scan past \n
 702       dup /FilePosition PDFfile fileposition put
 703       PDFDEBUG {
 704         PDFSTEPcount 1 le {
 705           (%FilePosition: ) print dup /FilePosition get ==
 706         } if
 707       } if
 708     } if
 709     % Some (bad) PDf files have invalid stream lengths.  This causes problems
 710     % if we reposition beyond the end of the file.  So we compare the given
 711     % length to number of bytes left in the file.
 712     dup /Length oget
 713     dup PDFfile bytesavailable lt {     % compare to to bytes left in file
 714       PDFfile fileposition              % reposition to the end of stream
 715       add PDFfile exch setfileposition
 716     } {
 717       pop                               % bad stream length - do not reposition.
 718                                         % This will force a length warning below
 719     } ifelse
 720   } {
 721     pop
 722         % We're already reading from a stream, which we can't reposition.
 723         % Capture the sub-stream contents in a string.
 724     dup /Length oget string PDFsource exch readstring
 725     not {
 726       (   **** Warning: Unexpected EOF in stream!\n) pdfformaterror
 727       /stream cvx /rangecheck signalerror
 728     } if
 729     1 index exch /File exch put
 730   } ifelse
 731   PDFsource {token} stopped {
 732     pop null
 733   } {
 734     not { null } if
 735   } ifelse
 736   dup /endobj eq {
 737     % Another case that Acrobat Reader handles -- 'endobj' without 'endstream'.
 738     (   **** Warning: stream missing 'endstream'.\n) pdfformaterror
 739     pop /endstream              % fake a valid endstream
 740   } if
 741   /endstream ne {
 742     (   **** Warning: stream Length incorrect.\n) pdfformaterror
 743     dup /Length undef % prevent the use of the incorrect length.
 744     cvx endobj exit   % exit from .pdfrun now.
 745   } if
 746   cvx
 747 } bind def
 748 /endstream {
 749   exit
 750 } bind def
 751
 752 % Contrary to the published PDF (1.3) specification, Acrobat Reader
 753 % accepts abbreviated filter names everywhere, not just for in-line images,
 754 % and some applications (notably htmldoc) rely on this.
 755 /unabbrevfilterdict mark
 756   /AHx /ASCIIHexDecode  /A85 /ASCII85Decode  /CCF /CCITTFaxDecode
 757   /DCT /DCTDecode  /Fl /FlateDecode  /LZW /LZWDecode  /RL /RunLengthDecode
 758 .dicttomark readonly def
 759
 760 % Extract and apply filters.
 761 /filterparms {          % <dict> <DPkey> <Fkey> filterparms
 762                         %   <dict> <parms> <filternames>
 763   2 index exch knownoget {
 764     exch 2 index exch knownoget {
 765                 % Both filters and parameters.
 766       exch dup type /nametype eq {
 767         1 array astore exch
 768         dup type /arraytype ne { 1 array astore } if exch
 769       } if
 770     } {
 771                 % Filters, but no parameters.
 772       null exch
 773       dup type /nametype eq { 1 array astore } if
 774     } ifelse
 775   } {
 776                 % No filters: ignore parameters, if any.
 777     pop null { }
 778   } ifelse
 779 } bind def
 780 /filtername {           % <filtername> filtername <filtername'>
 781   //unabbrevfilterdict 1 index .knownget { exch pop } if
 782   dup /Filter resourcestatus { pop pop } {
 783     Repaired exch       % this error is not the creator's fault
 784     (   **** ERROR: Unable to process ) pdfformaterror
 785     64 string cvs pdfformaterror
 786     ( data. Page will be missing data.\n) pdfformaterror
 787     /Repaired exch store % restore the previous "Repaired" state
 788     % provide a filter that returns EOF (no data)
 789     /.EOFDecode
 790   } ifelse
 791 } bind def
 792 /applyfilters {         % <parms> <source> <filternames> applyfilters <stream>
 793   2 index null eq {
 794     { filtername filter }
 795   } {
 796     {           % Stack: parms source filtername
 797       2 index 0 oget dup null eq { pop } {
 798         exch filtername dup /JBIG2Decode eq { exch jbig2cachectx exch } if
 799       } ifelse filter
 800       exch dup length 1 sub 1 exch getinterval exch
 801     }
 802   } ifelse forall exch pop
 803 } bind def
 804
 805 % JBIG2 streams have an optional 'globals' stream obj for
 806 % sharing redundant data between page images. Here we resolve
 807 % that stream reference (if any) and run it through the decoder,
 808 % creating a special -jbig2globalctx- postscript object our
 809 % JBIG2Decode filter implementation looks for in the parm dict.
 810 /jbig2cachectx { % <parmdict> jbig2cachectx <parmdict>
 811   dup /JBIG2Globals knownoget {
 812     dup /Length oget
 813     % make global ctx
 814     PDFfile fileposition 3 1 roll % resolvestream is not reentrant
 815     exch true resolvestream exch .bytestring
 816     .readbytestring pop .jbig2makeglobalctx
 817     PDFfile 3 -1 roll setfileposition
 818     1 index exch
 819     /.jbig2globalctx exch put
 820   } if
 821 } bind def
 822
 823 % Resolve a stream dictionary to a PostScript stream.
 824 % Streams with no filters require special handling:
 825 %     - Whether we are going to interpret the stream, or If we are just
 826 %       going to read data from them, we impose a SubFileDecode filter
 827 %         that reads just the requisite amount of data.
 828 % Note that, in general, resolving a stream repositions PDFfile.
 829 % Clients must save and restore the position of PDFfile themselves.
 830 /resolvestream {        % <streamdict> <readdata?> resolvestream <stream>
 831   1 index /F knownoget {
 832                 % This stream is stored on an external file.
 833     (r) file 3 -1 roll
 834     /FDecodeParms /FFilter filterparms
 835                 % Stack: readdata? file dict parms filternames
 836     4 -1 roll exch
 837     pdf_decrypt_stream
 838     applyfilters
 839   } {
 840     exch dup /FilePosition .knownget {
 841       1 index /File get exch setfileposition
 842     } if
 843                 % Stack: readdata? dict
 844     /DecodeParms /Filter filterparms
 845                 % Stack: readdata? dict parms filternames
 846     2 index /File get exch
 847                 % Stack: readdata? dict parms file/string filternames
 848     pdf_decrypt_stream          % add decryption if needed
 849     dup length 0 eq {
 850                 % All the PDF filters have EOD markers, but in this case
 851                 % there is no specified filter.
 852       pop exch pop
 853                 % Stack: readdata? dict file/string
 854       2 index 1 index type /filetype eq or {
 855               % Use length for any files or reading data from any source.
 856         1 index /Length knownoget not { 0 } if
 857       } {
 858       0       % Otherwise length of 0 for whole string
 859       } ifelse
 860       2 index /IDFlag known { pop } { () /SubFileDecode filter } ifelse
 861     } {
 862       applyfilters
 863     } ifelse
 864   } ifelse
 865                 % Stack: readdata? dict file
 866   exch pop exch pop
 867 } bind def
 868
 869 % ============================ Name/number trees ============================ %
 870
 871 /nameoget {             % <nametree> <key> nameoget <obj|null>
 872   exch /Names exch .treeget
 873 } bind def
 874
 875 /numoget {              % <numtree> <key> numoget <obj|null>
 876   exch /Nums exch .treeget
 877 } bind def
 878
 879 /.treeget {             % <key> <leafkey> <tree> .treeget <obj|null>
 880   dup /Kids knownoget {
 881     exch pop .branchget
 882   } {
 883     exch get .leafget
 884   } ifelse
 885 } bind def
 886
 887 /.branchget {           %  <key> <leafkey> <kids> .branchget <obj|null>
 888   dup length 0 eq {
 889     pop pop pop null
 890   } {
 891     dup length -1 bitshift 2 copy oget
 892                         % Stack: key leafkey kids mid kids[mid]
 893     dup /Limits oget aload pop
 894                         % Stack: key leafkey kids mid kids[mid] min max
 895     6 index lt {
 896       pop pop
 897       1 add 1 index length 1 index sub getinterval .branchget
 898     } {
 899       5 index gt {
 900         pop
 901         0 exch getinterval .branchget
 902       } {
 903         exch pop exch pop .treeget
 904       } ifelse
 905     } ifelse
 906   } ifelse
 907 } bind def
 908
 909 /.leafget {             % <key> <pairs> .leafget <obj|null>
 910   dup length 2 eq {
 911     dup 0 get 2 index eq { 1 oget } { pop null } ifelse
 912     exch pop
 913   } {
 914     dup length -1 bitshift -2 and 2 copy oget
 915                         % Stack: key pairs mid pairs[mid]
 916     3 index gt { 0 exch } { 1 index length 1 index sub } ifelse
 917     getinterval .leafget
 918   } ifelse
 919 } bind def
 920
 921 end                     % pdfdict
 922 .setglobal