Skip to content

Commit

Permalink
php-gettext#282 Added support for extra escaping sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasraoni committed Jul 22, 2022
1 parent ef12806 commit 078246a
Showing 1 changed file with 46 additions and 12 deletions.
58 changes: 46 additions & 12 deletions src/Loader/StrictPoLoader.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use Gettext\Translations;

/**
* Class to load a PO file following the same rules of the GNU tools.
* Class to load a PO file following the same rules of the GNU gettext tools.
*/
final class StrictPoLoader extends Loader
{
Expand Down Expand Up @@ -139,7 +139,19 @@ private function readNumber(): string
}

/**
* Attempts to read a standard comment string which ends on \n
* Read at least one character from the given character set
*/
private function readCharset(string $charset, int $maxLength): string
{
for ($data = ''; ($char = $this->getChar()) !== null && is_int(strpos($charset, $char)) && --$maxLength >= 0; $data .= $this->nextChar());
if ($data === '') {
throw new Exception("Expected at least one occurrence of the characters \"{$charset}\" at byte {$this->position}");
}
return $data;
}

/**
* Attempts to read a standard comment string which ends on \n or \r
*/
private function readCommentString(): string
{
Expand All @@ -163,7 +175,9 @@ private function readQuotedString(): string
't' => "\t",
'v' => "\x0b",
'"' => '"',
];
],
$octalDigits = '01234567',
$hexDigits = $octalDigits . '89abcdefABCDEF';
$hasData = false;
for ($data = '';;) {
if (!$this->readChar('"')) {
Expand All @@ -176,15 +190,35 @@ private function readQuotedString(): string
// Collects chars until the end of the data/file
for (; ($char = $this->getChar() ?? '"') !== '"'; $data .= $char) {
$this->nextChar();
if ($char === '\\') {
// Ensures the next char is a valid escape character
if (($char = $aliases[$this->nextChar()] ?? null) === null) {
throw new Exception("Invalid quoted character at byte {$this->position}");
}
continue;
}
if ($char === "\n" || $char === "\r") {
throw new Exception("New line character must be encoded at byte {$this->position}");
$octalSequence = ctype_digit($char) ? $char : '';
switch ($char) {
case '\\':
// Ensures the next char is a valid escape character
if (($char = $aliases[$this->nextChar()] ?? null) === null) {
throw new Exception("Invalid quoted character at byte {$this->position}");
}
break;
case "\n":
case "\r":
throw new Exception("New line character must be encoded at byte {$this->position}");
case 'U':
case 'u':
// The GNU gettext is supposed to follow the escaping sequences of C, but curiously it doesn't support the u/U
$data = $this->readCharset($hexDigits, $char === 'u' ? 4 : 8);
$data = str_pad($data, strlen($data) + (strlen($data) & 1), '0', STR_PAD_LEFT);
$char = json_decode("\"\\u{$data}\"");
break;
case $octalSequence:
$data = $this->readCharset($octalDigits, 3);
// GNU gettext fails with octals above the signed char range
if (($decimal = octdec($data)) > 127) {
throw new Exception("Octal value out of range [0, 0177] at byte {$this->position}");
}
$char = chr($decimal);
case 'x':
$data = $this->readCharset($hexDigits, PHP_INT_MAX);
// GNU reads all valid hexadecimal chars, but only uses the last pair
$char = hexdec(substr($data, -2));
}
}
if (!$this->readChar('"')) {
Expand Down

0 comments on commit 078246a

Please sign in to comment.