1 files changed, 130 insertions, 8 deletions
diff --git a/Resource/Init/pdf_base.ps b/Resource/Init/pdf_base.ps
index 02503eef..618e5e51 100644
--- a/Resource/Init/pdf_base.ps
+++ b/Resource/Init/pdf_base.ps
@@ -776,11 +776,123 @@ currentdict /token_nofail_dict .undef
   count 4 index add		% Determine stack depth with objects
   3 1 roll
   resolveobjstreamopdict .pdfrun % Get PDF objects
-  count counttomark 1 add index ne { % Check stack depth
-    (   **** Error: Incorrect object count in object stream.\n) pdfformaterror
-    (               Output may be incorrect.\n) pdfformaterror
-    /resolveobjectstream cvx /rangecheck signalerror
+  count counttomark 1 add index ne
+  {
+    count counttomark 1 add index gt {
+      (   **** Error: Incorrect object count in object stream (too many objects).\n) pdfformaterror
+      (               Output may be incorrect.\n) pdfformaterror
+    } if
+
+    % Its possible for us to end up here with a valid file. The way we work is to read the
+    % stream and tokenise all the objects, but that assumes there will be delimiters or
+    % whitespace between each object in the stream. We can easily think of cases where
+    % that is not true; eg two consecutive numbers 123 456 would be tokenised as a
+    % single number with the value 123456.
+    % In this case we can use the approach below to read each object individually and
+    % that will define the correct number of objects. After we've collected all the
+    % objects we'll check the number of objects recovered again, and see if there
+    % are still too few. We should probably raise an error in that case, but lets wait
+    % until we see a case.
+
+    % Having too many objects in an ObjStm is not illegal, dumb but not illegal. We can
+    % recover from this, but its complicated. The extra object could be any of the ones
+    % from the stream, the only way to find out is to process each object individually.
+    % Unfortunately, my attempts to come up with a version of .pdfrun which only tokenised
+    % one object got rapidly bogged down. So a new approach was needed.
+    %
+    % The idea is simple; turn the underlying file into a ReusableStream, then we can
+    % reposition it. Build an array of the object offsets, reposition the file to the
+    % start of each object in turn. For all except the last object, create a sub file
+    % using SubFileDeocde with an empty string, and an EODCount which is the difference
+    % between the offset of this object and the offset of the next.
+    % For the last object we just read from the offset to the end of the stream.
+    %
+    % Then call .pdfrun on that sub file, or the main stream, which will tokenise all the
+    % objects from that point onwards. Then discard all but the first object read.
+    %
+    % The complications arise from the fact that we need to end up with the objects on
+    % the stack, in the right order, preceded by a mark, a count, and an array containing
+    % all the object numbers.
+
+    % First, discard everything we read up to now; we can reuse the mark which was placed
+    % by the preceding code to do this, as long as we remember to replace that mark.
+    % Handily this will leave the count and the array which contains the object numbers in place.
+    cleartomark
+
+    % Replace the mark consumed by conttomark above, so that we match what the code following this error handling expects.
+    mark
+
+    % copy the ObjStm dictionary and then copy the count of objects expected
+    4 index 4 index
+
+    % Copy the ObjStm dictionary, and create a file from it
+    1 index //false resolvestream	        % Convert stream dict into a stream
+
+    /ReusableStreamDecode filter	        % We need to be able to re-position the stream
+
+    1 index array                         % Create array for holding offsets
+
+    % Get the object offsets, these are stored at teh start of the stream, interleaved
+    % between the object numbers. We know reading these can't fail, because we've already
+    % done this to create the array of object numbers, above.
+    0 1                                   % starting index (0), increment (1) for loop
+    4 index 1 sub                         % limit of loop is object count-1 (because we start at 0)
+    {
+      1 index 1 index                     % copy array and index
+      4 index                             % copy the file object
+      token pop pop                       % read a token (object number) and discard it
+      4 index token                       % read another token (offset)
+      pop put pop                         % put the offset into the array at the new index
+    } for
+
+    % We now have an array with all the object offsets in it
+    % So we need to reposition the file to the start of each object and read
+    % from that point. We use the difference between two offsets to setup
+    % a SubFileDecode filter to only read as many bytes as there are
+    % between the objects. Normally this should result in us reading one object,
+    % if there are extra objects then we'll discard the extras. By doing this
+    % we avoid tokenising the same data multiple times.
+
+    % Set the loop to be from 0 to the  number of objects minus 1 (starts from 0), step 1.
+    2 index 1 sub 0 1 3 -1 roll
+    {
+      dup                                         % copy the loop index
+      2 index length 2 sub lt                     % Check if index + 1 exceeds array size (it will on last iteration)
+
+      {                                           % Not the last iteration
+        dup                                       % copy the loop index
+        2 index exch get 5 index /First get add   % get the object offset, and add the value of First from the dictionary.
+        exch 1 add                                % add one to the loop index
+        2 index exch get 5 index /First get add   % and get the offset to the start of the next object
+
+        1 index sub                               % copy the first offset and subtract from the second to get the length
+        exch
+        3 index dup 3 -1 roll setfileposition     % copy the file and position it to the offset of the object
+        exch
+        << /EODCount 3 -1 roll /EODString () >>   % Set up a dictionary for the filter
+        /SubFileDecode filter                     % and apply it to limit the number of bytes read.
+      }
+      {                                           % On the last iteration, simly read from the file, we don't need another SubFileDecode.
+        1 index exch get 4 index /First get add   % get the offset, and add the value of First from the dictionary.
+        2 index dup 3 -1 roll setfileposition     % copy the stream, and position it at that point
+      }ifelse
+
+      mark exch resolveobjstreamopdict .pdfrun    % make a mark and then tokenise all the objects from there to the end of stream (or sub file)
+      counttomark 2 sub                           % Find out how many objects we created (less two, one for the count and one for the first object)
+      0 1 3 -1 roll {pop pop} for                 % pop that many objects off the stack
+      6 1 roll pop                                % roll the new object to be behind our working objects on the stack and pop the mark
+    } for
+
+    % pop most of the working objects (array, file, and ObjStrm dictionary)
+    % but leave count of objects. Check that aginst the number of objects
+    % retrieved. If we got too few then issue a warning.
+
+    pop pop exch pop counttomark 1 sub lt {
+      (   **** Error: Incorrect object count in object stream (too few objects).\n) pdfformaterror
+      (               Output may be incorrect.\n) pdfformaterror
+    } if
   } if
+
                 % We have the object data
   counttomark array astore	% Put objects into an array
   exch pop exch pop		% Remove mark and count
@@ -1139,16 +1251,26 @@ currentdict /pdf_rules_dict undef
   dup /JBIG2Globals knownoget {
     % make global ctx
     PDFfile fileposition exch % resolvestream is not reentrant
+    mark exch
     //true resolvestream 		% stack after: PDFfileposition -file-
     % Read the data in a loop until EOF to so we can move the strings into a bytestring
     [ { counttomark 1 add index 60000 string readstring not { exit } if } loop ]
     exch pop 0 1 index { length add } forall	% compute the total length
     % now copy the data from the array of strings into a bytestring
     .bytestring exch 0 exch { 3 copy putinterval length add } forall pop
-    .jbig2makeglobalctx
-    PDFfile 3 -1 roll setfileposition
-    1 index exch
-    /.jbig2globalctx exch put
+    % If this fails we don't want to abort totally, there may be more content
+    % in the PDF file that we can render. So just run in a stopped context.
+    {.jbig2makeglobalctx} stopped
+    {
+      cleartomark
+      PDFfile exch setfileposition
+    }
+    {
+      exch pop
+      PDFfile 3 -1 roll setfileposition
+      1 index exch
+      /.jbig2globalctx exch put
+    } ifelse
   } if
 } bind executeonly def