From 10fc7d33e01954d24d601c9d5e782ea93dd97dd3 Mon Sep 17 00:00:00 2001 From: HorstOeko Date: Tue, 24 Sep 2024 05:21:35 +0200 Subject: [PATCH] #120 ZugferdDocumentPdfReader now contains additional methods for extracting the XML content --- build/phpunit.xml | 4 +- src/ZugferdDocumentPdfReader.php | 61 +++++++++++++++++++++++- tests/testcases/PdfReaderGeneralTest.php | 52 ++++++++++++++++++++ tests/testcases/PdfReaderInvalidTest.php | 21 -------- 4 files changed, 112 insertions(+), 26 deletions(-) create mode 100644 tests/testcases/PdfReaderGeneralTest.php delete mode 100644 tests/testcases/PdfReaderInvalidTest.php diff --git a/build/phpunit.xml b/build/phpunit.xml index d66286fb..b8dab394 100644 --- a/build/phpunit.xml +++ b/build/phpunit.xml @@ -29,13 +29,13 @@ ../tests/testcases/BuilderExtendedTest.php + ../tests/testcases/PdfReaderGeneralTest.php ../tests/testcases/PdfReaderMinimumSimpleTest.php ../tests/testcases/PdfReaderEn16931AllowanceChargeTest.php ../tests/testcases/PdfReaderEn16931SimpleTest.php ../tests/testcases/PdfReaderExtendedTest.php ../tests/testcases/PdfReaderExtended2Test.php ../tests/testcases/PdfReaderXRechnungSimpleTest.php - ../tests/testcases/PdfReaderInvalidTest.php ../tests/testcases/PdfReaderMultipleAttachmentsTest.php @@ -45,11 +45,9 @@ ../tests/testcases/ValidatorValidTest.php ../tests/testcases/ValidatorInvalidTest.php - ../tests/testcases/issues/Issue10Test.php ../tests/testcases/issues/Issue18Test.php diff --git a/src/ZugferdDocumentPdfReader.php b/src/ZugferdDocumentPdfReader.php index 0c4e19e9..2488c0c0 100644 --- a/src/ZugferdDocumentPdfReader.php +++ b/src/ZugferdDocumentPdfReader.php @@ -53,13 +53,70 @@ public static function readAndGuessFromFile(string $pdfFilename): ?ZugferdDocume } /** - * Load a PDF content (ZUGFeRD/Factur-X) + * Tries to load an attachment content from PDF and return a ZugferdDocumentReader + * If any erros occured or no attachments were found null is returned * * @param string $pdfContent * String Containing the binary pdf data * @return ZugferdDocumentReader|null */ public static function readAndGuessFromContent(string $pdfContent): ?ZugferdDocumentReader + { + $xmlContent = static::extractXMLFromContent($pdfContent); + + if (is_null($xmlContent)) { + return null; + } + + try { + return ZugferdDocumentReader::readAndGuessFromContent($xmlContent); + } catch (\Exception $e) { + return null; + } + } + + /** + * Returns a XML content from a PDF file + * + * @param string $pdfFilename + * Contains a full-qualified filename which must exist and must be readable + * @return string|null + */ + public static function getXmlFromFile(string $pdfFilename) : ?string + { + if (!file_exists($pdfFilename)) { + throw new ZugferdFileNotFoundException($pdfFilename); + } + + $pdfContent = file_get_contents($pdfFilename); + + if ($pdfContent === false) { + throw new ZugferdFileNotReadableException($pdfFilename); + } + + return static::getXmlFromContent($pdfContent); + } + + /** + * Returns a XML content from a PDF binary stream (string) + * + * @param string $pdfContent + * String Containing the binary pdf data + * @return string|null + */ + public static function getXmlFromContent(string $pdfContent) : ?string + { + return static::extractXMLFromContent($pdfContent); + } + + /** + * Get the attachment content from XML. + * See the allowed filenames which are supported + * + * @param string $pdfContent + * @return string|null + */ + private static function extractXMLFromContent(string $pdfContent): ?string { $pdfParser = new PdfParser(); $pdfParsed = $pdfParser->parseContent($pdfContent); @@ -87,7 +144,7 @@ public static function readAndGuessFromContent(string $pdfContent): ?ZugferdDocu $embeddedFiles = $pdfParsed->getObjectsByType('EmbeddedFile'); foreach ($embeddedFiles as $embeddedFile) { if ($attachmentIndex == $embeddedFileIndex) { - $returnValue = ZugferdDocumentReader::readAndGuessFromContent($embeddedFile->getContent()); + $returnValue = $embeddedFile->getContent(); break; } $embeddedFileIndex++; diff --git a/tests/testcases/PdfReaderGeneralTest.php b/tests/testcases/PdfReaderGeneralTest.php new file mode 100644 index 00000000..92283d91 --- /dev/null +++ b/tests/testcases/PdfReaderGeneralTest.php @@ -0,0 +1,52 @@ +assertNull($document); + } + + public function testFileNotFound(): void + { + $this->expectException(ZugferdFileNotFoundException::class); + $document = ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + } + + public function testCanReadPdf2(): void + { + $document = ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/InvalidPDF.pdf"); + $this->assertNull($document); + } + + public function testFileNotFound2(): void + { + $this->expectException(ZugferdFileNotFoundException::class); + $document = ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/unknown.pdf"); + } + + public function testCanReadPdf3(): void + { + $document = ZugferdDocumentPdfReader::readAndGuessFromFile(dirname(__FILE__) . "/../assets/zugferd_2p1_EN16931_Einfach.pdf"); + $this->assertNotNull($document); + $this->assertInstanceOf(ZugferdDocument::class, $document); + } + + public function testCanReadPdf4(): void + { + $xmlString = ZugferdDocumentPdfReader::getXmlFromFile(dirname(__FILE__) . "/../assets/zugferd_2p1_EN16931_Einfach.pdf"); + $this->assertNotNull($xmlString); + $this->assertIsString($xmlString); + $this->assertStringContainsString("assertStringContainsString("assertStringContainsString("", $xmlString); + } +} diff --git a/tests/testcases/PdfReaderInvalidTest.php b/tests/testcases/PdfReaderInvalidTest.php deleted file mode 100644 index 1ec85d4e..00000000 --- a/tests/testcases/PdfReaderInvalidTest.php +++ /dev/null @@ -1,21 +0,0 @@ -assertNull(self::$document); - } -}