LibPDF: Tolerate first object being an indirect null object

For the test, I incremented all object numbers and object references
in complex.pdf by one and then added

    1 0 obj
    null
    endobj

right after the header, then I ran

    mutool clean Tests/LibPDF/complex.pdf Tests/LibPDF/complex.pdf

to fix up offsets.

With this, we can decode the PDF attached to
https://github.com/mozilla/pdf.js/issues/7802
This commit is contained in:
Nico Weber
2025-11-27 21:38:30 -05:00
parent 885922ce93
commit dcef55a6a8
2 changed files with 54 additions and 70 deletions

View File

@@ -1,93 +1,77 @@
%PDF-1.1
%µ¶
1 0 obj
<< /Kids [2 0 R 3 0 R] /Type /Pages /Count 3 >>
null
endobj
2 0 obj
<</Kids[3 0 R 4 0 R]/Type/Pages/Count 3>>
endobj
3 0 obj
<</Rotate 0/Parent 2 0 R/Resources<</Font<</F0<</BaseFont/Times-Italic/Subtype/Type1/Type/Font>>>>>>/MediaBox[0 0 595.2756 841.8898]/Type/Page/Contents[5 0 R]>>
endobj
4 0 obj
<< >>
<</Parent 2 0 R/Kids[9 0 R 7 0 R]/Count 2/Type/Pages>>
endobj
5 0 obj
<</Length 66>>
stream
1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page One) Tj ET
endstream
endobj
2 0 obj
<<
/Rotate 0
/Parent 1 0 R
/Resources
<< /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >>
/MediaBox [0.000000 0.000000 595.275590551 841.88976378]
/Type /Page
/Contents [4 0 R]
>>
endobj
5 0 obj
<< /PageLayout /TwoColumnLeft /Pages 1 0 R /Type /Catalog >>
endobj
6 0 obj
<<
/Rotate 0
/Parent 3 0 R
/Resources
<< /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >>
/MediaBox [0.000000 0.000000 595.275590551 841.88976378]
/Type /Page
/Contents [7 0 R]
>>
<</PageLayout/TwoColumnLeft/Pages 2 0 R/Type/Catalog>>
endobj
3 0 obj
<< /Parent 1 0 R /Kids [8 0 R 6 0 R] /Count 2 /Type /Pages >>
7 0 obj
<</Rotate 0/Parent 4 0 R/Resources<</Font<</F0<</BaseFont/Times-Italic/Subtype/Type1/Type/Font>>>>>>/MediaBox[0 0 595.2756 841.8898]/Type/Page/Contents[8 0 R]>>
endobj
8 0 obj
<<
/Rotate 270
/Parent 3 0 R
/Resources
<< /Font << /F0 << /BaseFont /Times-Italic /Subtype /Type1 /Type /Font >> >> >>
/MediaBox [0.000000 0.000000 595.275590551 841.88976378]
/Type /Page
/Contents [9 0 R]
>>
<</Length 68>>
stream
1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Three) Tj ET
endstream
endobj
9 0 obj
<< >>
<</Rotate 270/Parent 4 0 R/Resources<</Font<</F0<</BaseFont/Times-Italic/Subtype/Type1/Type/Font>>>>>>/MediaBox[0 0 595.2756 841.8898]/Type/Page/Contents[10 0 R]>>
endobj
10 0 obj
<</Length 151>>
stream
q 1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Two) Tj ET Q
1. 0.000000 0.000000 1. 50. 750 cm BT /F0 16 Tf ((Rotated by 270 degrees)) Tj ET
endstream
endobj
7 0 obj
<< >>
stream
1. 0.000000 0.000000 1. 50. 770. cm BT /F0 36. Tf (Page Three) Tj ET
endstream
11 0 obj
<</Title(PDF Explained Example)/Author(John Whitington)/Producer(Manually Created)/ModDate(D:20110313002346Z)/CreationDate(D:2011)>>
endobj
10 0 obj
<<
/Title (PDF Explained Example)
/Author (John Whitington)
/Producer (Manually Created)
/ModDate (D:20110313002346Z)
/CreationDate (D:2011)
>>
endobj xref
0 11
xref
0 12
0000000000 65536 f
0000000009 00000 n
0000000177 00000 n
0000000731 00000 n
0000000072 00000 n
0000000416 00000 n
0000000492 00000 n
0000001239 00000 n
0000000808 00000 n
0000001049 00000 n
0000001346 00000 n
0000000016 00000 n
0000000037 00000 n
0000000095 00000 n
0000000272 00000 n
0000000343 00000 n
0000000458 00000 n
0000000529 00000 n
0000000706 00000 n
0000000823 00000 n
0000001003 00000 n
0000001205 00000 n
trailer
<<
/Info 10 0 R
/Root 5 0 R
/Size 11
/ID [<75ff22189ceac848dfa2afec93deee03> <75ff22189ceac848dfa2afec93deee03>]
>>
<</Size 12/Info 11 0 R/Root 6 0 R/ID[<75FF22189CEAC848DFA2AFEC93DEEE03><75FF22189CEAC848DFA2AFEC93DEEE03>]>>
startxref
1516
1355
%%EOF

View File

@@ -183,7 +183,7 @@ PDFErrorOr<DocumentParser::LinearizationResult> DocumentParser::initialize_linea
auto dict_value = indirect_value_or_error.value()->value();
if (!dict_value.has<NonnullRefPtr<Object>>())
return error("Expected linearization object to be a dictionary");
return LinearizationResult::NotLinearized;
auto dict_object = dict_value.get<NonnullRefPtr<Object>>();
if (!dict_object->is<DictObject>())