Skip to content

Commit de970d4

Browse files
committed
feat: allow asking for using xref streams, which opens the option to build very large files
1 parent 015bd5e commit de970d4

8 files changed

+241
-39
lines changed

PDFWriter/DocumentContext.cpp

+54-22
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ DocumentContext::DocumentContext()
6060
mObjectsContext = NULL;
6161
mParserExtender = NULL;
6262
mModifiedDocumentIDExists = false;
63+
SetWriteXrefAsXrefStream(false);
6364
}
6465

6566
DocumentContext::~DocumentContext(void)
@@ -80,12 +81,26 @@ void DocumentContext::SetObjectsContext(ObjectsContext* inObjectsContext)
8081
mPNGImageHandler.SetOperationsContexts(this, mObjectsContext);
8182
#endif
8283
mExtGStateRegistry.SetObjectsContext(mObjectsContext);
84+
SetupXrefMaxWritePositionValidation();
85+
}
86+
87+
void DocumentContext::SetupXrefMaxWritePositionValidation()
88+
{
89+
// Validating Max Xref position to be 10 digits long is only relevant for regular xref writing.
90+
// Cancel validation if xref stream is used instead.
91+
if(mObjectsContext)
92+
mObjectsContext->GetInDirectObjectsRegistry().SetShouldValidateMaxWritePositionForXref(!mWriteXrefAsXrefStream);
8393
}
8494

8595
void DocumentContext::SetEmbedFonts(bool inEmbedFonts) {
8696
mUsedFontsRepository.SetEmbedFonts(inEmbedFonts);
8797
}
8898

99+
void DocumentContext::SetWriteXrefAsXrefStream(bool inWriteXrefAsXrefStream) {
100+
mWriteXrefAsXrefStream = inWriteXrefAsXrefStream;
101+
SetupXrefMaxWritePositionValidation();
102+
}
103+
89104
void DocumentContext::SetOutputFileInformation(OutputFile* inOutputFile)
90105
{
91106
// just save the output file path for the ID generation in the end
@@ -222,13 +237,20 @@ EStatusCode DocumentContext::FinalizeNewPDF()
222237
if (status != eSuccess)
223238
break;
224239

225-
status = mObjectsContext->WriteXrefTable(xrefTablePosition);
226-
if(status != eSuccess)
227-
break;
240+
if(mWriteXrefAsXrefStream) {
241+
status = WriteXrefStream(xrefTablePosition);
242+
if(status != eSuccess)
243+
break;
244+
} else {
245+
status = mObjectsContext->WriteXrefTable(xrefTablePosition);
246+
if(status != eSuccess)
247+
break;
228248

229-
status = WriteTrailerDictionary();
230-
if(status != eSuccess)
231-
break;
249+
status = WriteTrailerDictionary();
250+
if(status != eSuccess)
251+
break;
252+
253+
}
232254

233255
WriteXrefReference(xrefTablePosition);
234256
WriteFinalEOF();
@@ -2343,8 +2365,31 @@ void DocumentContext::UnRegisterCopyingContext(PDFDocumentCopyingContext* inCopy
23432365
mCopyingContexts.erase(inCopyingContext);
23442366
}
23452367

2368+
2369+
bool DocumentContext::RequiresXrefStream(PDFParser* inModifiedFileParser)
2370+
{
2371+
// modification requires xref stream if the original document uses one...so just ask trailer
2372+
if(!inModifiedFileParser->GetTrailer())
2373+
return false;
2374+
2375+
PDFObjectCastPtr<PDFName> typeObject = inModifiedFileParser->GetTrailer()->QueryDirectObject("Type");
2376+
2377+
if(!typeObject)
2378+
return false;
2379+
2380+
return typeObject->GetValue() == "XRef";
2381+
2382+
2383+
}
2384+
2385+
23462386
EStatusCode DocumentContext::SetupModifiedFile(PDFParser* inModifiedFileParser)
23472387
{
2388+
// determine if file requires xref stream, in which case set it up
2389+
if(RequiresXrefStream(inModifiedFileParser)) {
2390+
SetWriteXrefAsXrefStream(true); // it may already have been setup to be true earlier by the users request, but if not, and this file requires it, set it up now
2391+
}
2392+
23482393
// setup trailer and save original document ID
23492394

23502395
if(!inModifiedFileParser->GetTrailer())
@@ -2514,9 +2559,11 @@ EStatusCode DocumentContext::FinalizeModifiedPDF(PDFParser* inModifiedFileParser
25142559
status = CopyEncryptionDictionary(inModifiedFileParser);
25152560
if(status != eSuccess)
25162561
break;
2517-
if(RequiresXrefStream(inModifiedFileParser))
2562+
if(mWriteXrefAsXrefStream)
25182563
{
25192564
status = WriteXrefStream(xrefTablePosition);
2565+
if(status != eSuccess)
2566+
break;
25202567
}
25212568
else
25222569
{
@@ -2781,21 +2828,6 @@ EStatusCode DocumentContext::CopyEncryptionDictionary(PDFParser* inModifiedFileP
27812828
return eSuccess;
27822829
}
27832830

2784-
bool DocumentContext::RequiresXrefStream(PDFParser* inModifiedFileParser)
2785-
{
2786-
// modification requires xref stream if the original document uses one...so just ask trailer
2787-
if(!inModifiedFileParser->GetTrailer())
2788-
return false;
2789-
2790-
PDFObjectCastPtr<PDFName> typeObject = inModifiedFileParser->GetTrailer()->QueryDirectObject("Type");
2791-
2792-
if(!typeObject)
2793-
return false;
2794-
2795-
return typeObject->GetValue() == "XRef";
2796-
2797-
2798-
}
27992831

28002832
EStatusCode DocumentContext::WriteXrefStream(LongFilePositionType& outXrefPosition)
28012833
{

PDFWriter/DocumentContext.h

+3
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ namespace PDFHummus
119119
void SetObjectsContext(ObjectsContext* inObjectsContext);
120120
void SetOutputFileInformation(OutputFile* inOutputFile);
121121
void SetEmbedFonts(bool inEmbedFonts);
122+
void SetWriteXrefAsXrefStream(bool inWriteXrefAsXrefStream);
122123
PDFHummus::EStatusCode WriteHeader(EPDFVersion inPDFVersion);
123124
PDFHummus::EStatusCode FinalizeNewPDF();
124125
PDFHummus::EStatusCode FinalizeModifiedPDF(PDFParser* inModifiedFileParser,EPDFVersion inModifiedPDFVersion);
@@ -413,6 +414,7 @@ namespace PDFHummus
413414
StringAndULongPairToHummusImageInformationMap mImagesInformation;
414415
EncryptionHelper mEncryptionHelper;
415416
ExtGStateRegistry mExtGStateRegistry;
417+
bool mWriteXrefAsXrefStream;
416418

417419
void WriteHeaderComment(EPDFVersion inPDFVersion);
418420
void Write4BinaryBytes();
@@ -461,5 +463,6 @@ namespace PDFHummus
461463
bool RequiresXrefStream(PDFParser* inModifiedFileParser);
462464
PDFHummus::EStatusCode WriteXrefStream(LongFilePositionType& outXrefPosition);
463465
HummusImageInformation& GetImageInformationStructFor(const std::string& inImageFile,unsigned long inImageIndex);
466+
void SetupXrefMaxWritePositionValidation();
464467
};
465468
}

PDFWriter/IndirectObjectsReferenceRegistry.cpp

+23-6
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ using namespace PDFHummus;
3737
IndirectObjectsReferenceRegistry::IndirectObjectsReferenceRegistry(void)
3838
{
3939
SetupInitialFreeObject();
40+
SetShouldValidateMaxWritePositionForXref(true);
4041
}
4142

4243
void IndirectObjectsReferenceRegistry::SetupInitialFreeObject()
@@ -51,6 +52,11 @@ void IndirectObjectsReferenceRegistry::SetupInitialFreeObject()
5152
mObjectsWritesRegistry.push_back(singleFreeObjectInformation);
5253
}
5354

55+
void IndirectObjectsReferenceRegistry::SetShouldValidateMaxWritePositionForXref(bool inShouldValidate)
56+
{
57+
mShouldValidateMaxWritePositionForXref = inShouldValidate;
58+
}
59+
5460
IndirectObjectsReferenceRegistry::~IndirectObjectsReferenceRegistry(void)
5561
{
5662
}
@@ -70,6 +76,20 @@ ObjectIDType IndirectObjectsReferenceRegistry::AllocateNewObjectID()
7076
return newObjectID;
7177
}
7278

79+
EStatusCode IndirectObjectsReferenceRegistry::MaybeValidateMaxWritePositionForXref(LongFilePositionType inWritePosition)
80+
{
81+
if(!mShouldValidateMaxWritePositionForXref)
82+
return PDFHummus::eSuccess;
83+
84+
if(inWritePosition > 9999999999LL) // if write position is larger than what can be represented by 10 digits, xref write will fail
85+
{
86+
TRACE_LOG1("IndirectObjectsReferenceRegistry::MaybeValidateMaxWritePositionForXref, Write position out of bounds. Trying to write an object at position that cannot be represented in Xref = %lld. probably means file got too long",inWritePosition);
87+
return PDFHummus::eFailure;
88+
}
89+
90+
return PDFHummus::eSuccess;
91+
}
92+
7393

7494
EStatusCode IndirectObjectsReferenceRegistry::MarkObjectAsWritten(ObjectIDType inObjectID,LongFilePositionType inWritePosition)
7595
{
@@ -86,9 +106,8 @@ EStatusCode IndirectObjectsReferenceRegistry::MarkObjectAsWritten(ObjectIDType i
86106
return PDFHummus::eFailure; // trying to mark as written an object that was already marked as such in the past. probably a mistake [till we have revisions]
87107
}
88108

89-
if(inWritePosition > 9999999999LL) // if write position is larger than what can be represented by 10 digits, xref write will fail
109+
if(MaybeValidateMaxWritePositionForXref(inWritePosition) != PDFHummus::eSuccess) // if write position is larger than what can be represented by 10 digits, xref write will fail
90110
{
91-
TRACE_LOG1("IndirectObjectsReferenceRegistry::MarkObjectAsWritten, Write position out of bounds. Trying to write an object at position that cannot be represented in Xref = %lld. probably means file got too long",inWritePosition);
92111
return PDFHummus::eFailure;
93112
}
94113

@@ -155,12 +174,10 @@ PDFHummus::EStatusCode IndirectObjectsReferenceRegistry::MarkObjectAsUpdated(Obj
155174
return PDFHummus::eFailure;
156175
}
157176

158-
if(inNewWritePosition > 9999999999LL) // if write position is larger than what can be represented by 10 digits, xref write will fail
177+
if(MaybeValidateMaxWritePositionForXref(inNewWritePosition) != PDFHummus::eSuccess) // if write position is larger than what can be represented by 10 digits, xref write will fail
159178
{
160-
TRACE_LOG1("IndirectObjectsReferenceRegistry::MarkObjectAsUpdated, Write position out of bounds. Trying to write an object at position that cannot be represented in Xref = %lld. probably means file got too long",inNewWritePosition);
161179
return PDFHummus::eFailure;
162-
}
163-
180+
}
164181

165182
mObjectsWritesRegistry[inObjectID].mIsDirty = true;
166183
mObjectsWritesRegistry[inObjectID].mWritePosition = inNewWritePosition;

PDFWriter/IndirectObjectsReferenceRegistry.h

+4
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,16 @@ class IndirectObjectsReferenceRegistry
8989
void Reset();
9090

9191
void SetupXrefFromModifiedFile(PDFParser* inModifiedFileParser);
92+
93+
void SetShouldValidateMaxWritePositionForXref(bool inShouldValidateMaxWritePositionForXref);
9294

9395
private:
9496
ObjectWriteInformationVector mObjectsWritesRegistry;
97+
bool mShouldValidateMaxWritePositionForXref;
9598

9699
void SetupInitialFreeObject();
97100
void AppendExistingItem(ObjectWriteInformation::EObjectReferenceType inObjectReferenceType,
98101
unsigned long inGenerationNumber,
99102
LongFilePositionType inWritePosition);
103+
PDFHummus::EStatusCode MaybeValidateMaxWritePositionForXref(LongFilePositionType inWritePosition);
100104
};

PDFWriter/PDFWriter.cpp

+15-10
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ PDFWriter::~PDFWriter(void)
5656
{
5757
}
5858

59-
EPDFVersion thisOrDefaultVersion(EPDFVersion inPDFVersion) {
60-
return ePDFVersionUndefined == inPDFVersion ? ePDFVersion14 : inPDFVersion;
59+
EPDFVersion thisOrDefaultVersion(EPDFVersion inPDFVersion, bool inWriteXrefAsXrefStream) {
60+
// if version is undefined, return 1.4 if xref stream is not used, 1.5 if it is (As this would be the lower version it's supported in)
61+
return ePDFVersionUndefined == inPDFVersion ? (inWriteXrefAsXrefStream ? ePDFVersion15: ePDFVersion14) : inPDFVersion;
6162
}
6263

6364
EStatusCode PDFWriter::StartPDF(
@@ -66,6 +67,7 @@ EStatusCode PDFWriter::StartPDF(
6667
const LogConfiguration& inLogConfiguration,
6768
const PDFCreationSettings& inPDFCreationSettings)
6869
{
70+
EPDFVersion pdfVersion = thisOrDefaultVersion(inPDFVersion, inPDFCreationSettings.WriteXrefAsXrefStream);
6971
SetupLog(inLogConfiguration);
7072
SetupCreationSettings(inPDFCreationSettings);
7173

@@ -77,7 +79,7 @@ EStatusCode PDFWriter::StartPDF(
7779
mDocumentContext.SetOutputFileInformation(&mOutputFile);
7880

7981
if (inPDFCreationSettings.DocumentEncryptionOptions.ShouldEncrypt) {
80-
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, thisOrDefaultVersion(inPDFVersion));
82+
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, pdfVersion);
8183
if (!mDocumentContext.SupportsEncryption()) {
8284
mOutputFile.CloseFile(); // close the file, to keep things clean
8385
return eFailure;
@@ -86,7 +88,7 @@ EStatusCode PDFWriter::StartPDF(
8688

8789
mIsModified = false;
8890

89-
return mDocumentContext.WriteHeader(thisOrDefaultVersion(inPDFVersion));
91+
return mDocumentContext.WriteHeader(pdfVersion);
9092
}
9193

9294
EStatusCode PDFWriter::EndPDF()
@@ -174,6 +176,7 @@ void PDFWriter::SetupCreationSettings(const PDFCreationSettings& inPDFCreationSe
174176
{
175177
mObjectsContext.SetCompressStreams(inPDFCreationSettings.CompressStreams);
176178
mDocumentContext.SetEmbedFonts(inPDFCreationSettings.EmbedFonts);
179+
mDocumentContext.SetWriteXrefAsXrefStream(inPDFCreationSettings.WriteXrefAsXrefStream);
177180
}
178181

179182
void PDFWriter::ReleaseLog()
@@ -555,18 +558,19 @@ EStatusCode PDFWriter::StartPDFForStream(IByteWriterWithPosition* inOutputStream
555558
const LogConfiguration& inLogConfiguration,
556559
const PDFCreationSettings& inPDFCreationSettings)
557560
{
561+
EPDFVersion pdfVersion = thisOrDefaultVersion(inPDFVersion, inPDFCreationSettings.WriteXrefAsXrefStream);
558562
SetupLog(inLogConfiguration);
559563
SetupCreationSettings(inPDFCreationSettings);
560564
if (inPDFCreationSettings.DocumentEncryptionOptions.ShouldEncrypt) {
561-
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, thisOrDefaultVersion(inPDFVersion));
565+
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, pdfVersion);
562566
if (!mDocumentContext.SupportsEncryption())
563567
return eFailure;
564568
}
565569

566570
mObjectsContext.SetOutputStream(inOutputStream);
567571
mIsModified = false;
568572

569-
return mDocumentContext.WriteHeader(thisOrDefaultVersion(inPDFVersion));
573+
return mDocumentContext.WriteHeader(pdfVersion);
570574
}
571575
EStatusCode PDFWriter::EndPDFForStream()
572576
{
@@ -691,7 +695,7 @@ EStatusCode PDFWriter::ModifyPDF(const std::string& inModifiedFile,
691695

692696
// do setup for modification
693697
mIsModified = true;
694-
status = SetupStateFromModifiedFile(inModifiedFile, thisOrDefaultVersion(inPDFVersion), inPDFCreationSettings);
698+
status = SetupStateFromModifiedFile(inModifiedFile, inPDFVersion, inPDFCreationSettings);
695699
}
696700
while (false);
697701

@@ -724,13 +728,14 @@ EStatusCode PDFWriter::ModifyPDFForStream(
724728

725729
mIsModified = true;
726730

727-
return SetupStateFromModifiedStream(inModifiedSourceStream, thisOrDefaultVersion(inPDFVersion), inPDFCreationSettings);
731+
return SetupStateFromModifiedStream(inModifiedSourceStream, inPDFVersion, inPDFCreationSettings);
728732
}
729733

730734
EStatusCode PDFWriter::SetupStateFromModifiedStream(IByteReaderWithPosition* inModifiedSourceStream,
731735
EPDFVersion inPDFVersion,
732736
const PDFCreationSettings& inPDFCreationSettings)
733737
{
738+
EPDFVersion pdfVersion = thisOrDefaultVersion(inPDFVersion, inPDFCreationSettings.WriteXrefAsXrefStream);
734739
EStatusCode status;
735740
PDFParsingOptions parsingOptions;
736741

@@ -761,7 +766,7 @@ EStatusCode PDFWriter::SetupStateFromModifiedStream(IByteReaderWithPosition* inM
761766
}
762767
}
763768

764-
mModifiedFileVersion = thisOrDefaultVersion(inPDFVersion);
769+
mModifiedFileVersion = pdfVersion;
765770
}
766771
while (false);
767772

@@ -778,7 +783,7 @@ EStatusCode PDFWriter::SetupStateFromModifiedFile(const std::string& inModifiedF
778783
if(status != eSuccess)
779784
break;
780785

781-
status = SetupStateFromModifiedStream(mModifiedFile.GetInputStream(), thisOrDefaultVersion(inPDFVersion), inPDFCreationSettings);
786+
status = SetupStateFromModifiedStream(mModifiedFile.GetInputStream(), inPDFVersion, inPDFCreationSettings);
782787
}
783788
while(false);
784789

PDFWriter/PDFWriter.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,16 @@ struct PDFCreationSettings
5959
bool CompressStreams;
6060
bool EmbedFonts;
6161
EncryptionOptions DocumentEncryptionOptions;
62+
bool WriteXrefAsXrefStream;
6263

63-
PDFCreationSettings(bool inCompressStreams, bool inEmbedFonts,EncryptionOptions inDocumentEncryptionOptions = EncryptionOptions::DefaultEncryptionOptions()):DocumentEncryptionOptions(inDocumentEncryptionOptions){
64+
PDFCreationSettings(
65+
bool inCompressStreams,
66+
bool inEmbedFonts,
67+
EncryptionOptions inDocumentEncryptionOptions = EncryptionOptions::DefaultEncryptionOptions(),
68+
bool inWriteXrefAsXrefStream = false):DocumentEncryptionOptions(inDocumentEncryptionOptions){
6469
CompressStreams = inCompressStreams;
6570
EmbedFonts = inEmbedFonts;
71+
WriteXrefAsXrefStream = inWriteXrefAsXrefStream;
6672
}
6773

6874
};

PDFWriterTesting/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ create_test_sourcelist (Tests
7676
UppercaseSequanceTest.cpp
7777
WatermarkTest.cpp
7878
WatermarkWithContextOpacityTest.cpp
79+
XrefStreamsTest.cpp
7980
)
8081

8182
# add the testing executable

0 commit comments

Comments
 (0)