Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Detect doubly-encoded xml to hide XXE attacks
Correct use of LibXml_Disable_Entity_Loader

* New test for double-encoded xml in security scanner
  • Loading branch information
Mark Baker authored Jun 30, 2019
1 parent 1e71154 commit 0e6238c
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 49 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org).

## [Unreleased]


## [1.8.0] - 2019-07-01

### Security Fix (CVE-2019-12331)

- Detect double-encoded xml in the Security scanner, and reject as suspicious.
- This change also broadens the scope of the `libxml_disable_entity_loader` setting when reading XML-based formats, so that it is enabled while the xml is being parsed and not simply while it is loaded.
On some versions of PHP, this can cause problems because it is not thread-safe, and can affect other PHP scripts running on the same server. This flag is set to true when instantiating a loader, and back to its original setting when the Reader is no longer in scope, or manually unset.
- Provide a check to identify whether libxml_disable_entity_loader is thread-safe or not.

`XmlScanner::threadSafeLibxmlDisableEntityLoaderAvailability()`
- Provide an option to disable the libxml_disable_entity_loader call through settings. This is not recommended as it reduces the security of the XML-based readers, and should only be used if you understand the consequences and have no other choice.

### Added

- Added support for the SWITCH function - [Issue #963](https://github.com/PHPOffice/PhpSpreadsheet/issues/963) and [PR #983](https://github.com/PHPOffice/PhpSpreadsheet/pull/983)
Expand Down
15 changes: 15 additions & 0 deletions src/PhpSpreadsheet/Reader/BaseReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ abstract class BaseReader implements IReader
public function __construct()
{
$this->readFilter = new DefaultReadFilter();

// A fatal error will bypass the destructor, so we register a shutdown here
register_shutdown_function([$this, '__destruct']);
}

private function shutdown()
{
if ($this->securityScanner !== null) {
$this->securityScanner = null;
}
}

public function __destruct()
{
$this->shutdown();
}

public function getReadDataOnly()
Expand Down
83 changes: 59 additions & 24 deletions src/PhpSpreadsheet/Reader/Security/XmlScanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace PhpOffice\PhpSpreadsheet\Reader\Security;

use PhpOffice\PhpSpreadsheet\Reader;
use PhpOffice\PhpSpreadsheet\Settings;

class XmlScanner
{
Expand All @@ -22,10 +23,16 @@ class XmlScanner

private $callback;

private function __construct($pattern = '<!DOCTYPE')
private static $libxmlDisableEntityLoaderValue;

public function __construct($pattern = '<!DOCTYPE')
{
$this->pattern = $pattern;
$this->libxmlDisableEntityLoader = $this->identifyLibxmlDisableEntityLoaderAvailability();

$this->disableEntityLoaderCheck();

// A fatal error will bypass the destructor, so we register a shutdown here
register_shutdown_function([$this, '__destruct']);
}

public static function getInstance(Reader\IReader $reader)
Expand All @@ -43,7 +50,7 @@ public static function getInstance(Reader\IReader $reader)
}
}

private function identifyLibxmlDisableEntityLoaderAvailability()
public static function threadSafeLibxmlDisableEntityLoaderAvailability()
{
if (PHP_MAJOR_VERSION == 7) {
switch (PHP_MINOR_VERSION) {
Expand All @@ -61,11 +68,53 @@ private function identifyLibxmlDisableEntityLoaderAvailability()
return false;
}

private function disableEntityLoaderCheck()
{
if (Settings::getLibXmlDisableEntityLoader()) {
$libxmlDisableEntityLoaderValue = libxml_disable_entity_loader(true);

if (self::$libxmlDisableEntityLoaderValue === null) {
self::$libxmlDisableEntityLoaderValue = $libxmlDisableEntityLoaderValue;
}
}
}

private function shutdown()
{
if (self::$libxmlDisableEntityLoaderValue !== null) {
libxml_disable_entity_loader(self::$libxmlDisableEntityLoaderValue);
}
}

public function __destruct()
{
$this->shutdown();
}

public function setAdditionalCallback(callable $callback)
{
$this->callback = $callback;
}

private function toUtf8($xml)
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = $result ? $matches[1] : 'UTF-8';

if ($charset !== 'UTF-8') {
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);

$result = preg_match($pattern, $xml, $matches);
$charset = $result ? $matches[1] : 'UTF-8';
if ($charset !== 'UTF-8') {
throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
}

return $xml;
}

/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
Expand All @@ -77,33 +126,19 @@ public function setAdditionalCallback(callable $callback)
*/
public function scan($xml)
{
if ($this->libxmlDisableEntityLoader) {
$previousLibxmlDisableEntityLoaderValue = libxml_disable_entity_loader(true);
}
$this->disableEntityLoaderCheck();

$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = $result ? $matches[1] : 'UTF-8';

if ($charset !== 'UTF-8') {
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);
}
$xml = $this->toUtf8($xml);

// Don't rely purely on libxml_disable_entity_loader()
$pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';

try {
if (preg_match($pattern, $xml)) {
throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
if (preg_match($pattern, $xml)) {
throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}

if ($this->callback !== null && is_callable($this->callback)) {
$xml = call_user_func($this->callback, $xml);
}
} finally {
if (isset($previousLibxmlDisableEntityLoaderValue)) {
libxml_disable_entity_loader($previousLibxmlDisableEntityLoaderValue);
}
if ($this->callback !== null && is_callable($this->callback)) {
$xml = call_user_func($this->callback, $xml);
}

return $xml;
Expand Down
42 changes: 42 additions & 0 deletions src/PhpSpreadsheet/Settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ class Settings
*/
private static $libXmlLoaderOptions = null;

/**
* Allow/disallow libxml_disable_entity_loader() call when not thread safe.
* Default behaviour is to do the check, but if you're running PHP versions
* 7.2 < 7.2.1
* 7.1 < 7.1.13
* 7.0 < 7.0.27
* 5.6 ANY
* then you may need to disable this check to prevent unwanted behaviour in other threads
* SECURITY WARNING: Changing this flag is not recommended.
*
* @var bool
*/
private static $libXmlDisableEntityLoader = true;

/**
* The cache implementation to be used for cell collection.
*
Expand Down Expand Up @@ -101,6 +115,34 @@ public static function getLibXmlLoaderOptions()
return self::$libXmlLoaderOptions;
}

/**
* Enable/Disable the entity loader for libxml loader.
* Allow/disallow libxml_disable_entity_loader() call when not thread safe.
* Default behaviour is to do the check, but if you're running PHP versions
* 7.2 < 7.2.1
* 7.1 < 7.1.13
* 7.0 < 7.0.27
* 5.6 ANY
* then you may need to disable this check to prevent unwanted behaviour in other threads
* SECURITY WARNING: Changing this flag to false is not recommended.
*
* @param bool $state
*/
public static function setLibXmlDisableEntityLoader($state)
{
self::$libXmlDisableEntityLoader = (bool) $state;
}

/**
* Return the state of the entity loader (disabled/enabled) for libxml loader.
*
* @return bool $state
*/
public static function getLibXmlDisableEntityLoader()
{
return self::$libXmlDisableEntityLoader;
}

/**
* Sets the implementation of cache that should be used for cell collection.
*
Expand Down
28 changes: 3 additions & 25 deletions tests/PhpSpreadsheetTests/Reader/Security/XmlScannerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Reader\Xls;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx;
use PhpOffice\PhpSpreadsheet\Reader\Xml;
use PHPUnit\Framework\TestCase;

class XmlScannerTest extends TestCase
Expand All @@ -19,12 +18,13 @@ class XmlScannerTest extends TestCase
*/
public function testValidXML($filename, $expectedResult, $libxmlDisableEntityLoader)
{
libxml_disable_entity_loader($libxmlDisableEntityLoader);
$oldDisableEntityLoaderState = libxml_disable_entity_loader($libxmlDisableEntityLoader);

$reader = XmlScanner::getInstance(new \PhpOffice\PhpSpreadsheet\Reader\Xml());
$result = $reader->scanFile($filename);
self::assertEquals($expectedResult, $result);
self::assertEquals($libxmlDisableEntityLoader, libxml_disable_entity_loader());

libxml_disable_entity_loader($oldDisableEntityLoaderState);
}

public function providerValidXML()
Expand Down Expand Up @@ -115,26 +115,4 @@ public function providerValidXMLForCallback()

return $tests;
}

/**
* @dataProvider providerLibxmlSettings
*
* @param $libxmlDisableLoader
*/
public function testNewInstanceCreationDoesntChangeLibxmlSettings($libxmlDisableLoader)
{
libxml_disable_entity_loader($libxmlDisableLoader);

$reader = new Xml();
self::assertEquals($libxmlDisableLoader, libxml_disable_entity_loader($libxmlDisableLoader));
unset($reader);
}

public function providerLibxmlSettings()
{
return [
[true],
[false],
];
}
}
2 changes: 2 additions & 0 deletions tests/data/Reader/Xml/XEETestInvalidUTF-7_DoubleEncoded.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-7"?>
+-ADwAIQ-DOCTYPE xmlrootname +-AFsAPAAh-ENTITY +-ACU aaa SYSTEM +-ACI-http://127.0.0.1:8080/ext.dtd+-ACIAPgAl-aaa+-ADsAJQ-ccc+-ADsAJQ-ddd+-ADsAXQA+-

0 comments on commit 0e6238c

Please sign in to comment.