diff --git a/lib/Parser/Turtle.php b/lib/Parser/Turtle.php index 37d6358a..1368d396 100644 --- a/lib/Parser/Turtle.php +++ b/lib/Parser/Turtle.php @@ -554,6 +554,8 @@ protected function parseQuotedLiteral() { $label = $this->parseQuotedString(); + $this->skipWSC(); + // Check for presence of a language tag or datatype $c = $this->peek(); @@ -906,7 +908,7 @@ protected function parseQNameOrBoolean() if ('true' == $value || 'false' == $value) { // Unread last character $this->unread($c); - + return [ 'type' => 'literal', 'value' => $value, @@ -943,6 +945,11 @@ protected function parseQNameOrBoolean() } $c = $this->read(); } + + // Last char of name must not be a dot + if (mb_substr($localName, -1) === '.') { + throw new Exception("Turtle Parse Error: last character of QName must not be a dot", $this->line, $this->column - 1); + } } // Unread last character @@ -1220,6 +1227,7 @@ public static function isNameChar($c) self::isNameStartChar($c) || $o >= 0x30 && $o <= 0x39 // 0-9 || '-' == $c + || '.' == $c // dots are allowed in the middle of a name, not as start char || 0x00B7 == $o; } diff --git a/test/fixtures/turtle/bad-07.ttl b/test/fixtures/turtle/bad-07.ttl index 18ea4fa6..f064d119 100644 --- a/test/fixtures/turtle/bad-07.ttl +++ b/test/fixtures/turtle/bad-07.ttl @@ -1,4 +1,3 @@ # paths are not in turtle @prefix : . -:a.:b.:c . :a^:b^:c . diff --git a/test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl new file mode 100644 index 00000000..ae406376 --- /dev/null +++ b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl @@ -0,0 +1,9 @@ +@prefix : . + +# A dot can't be the first nor the last character of the "after the semicolon part of a prefixed name" but is allowed in the middle +# See https://www.w3.org/TR/turtle/#grammar-production-PN_LOCAL +# See https://github.com/sweetrdf/easyrdf/issues/51 +# The triples below should not parse! +:SubjectWithEndDot. :predicate :Object . +:Subject :predicateWithEndDot. :Object . +:Subject :predicate :ObjectWithEndDot. . diff --git a/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out new file mode 100644 index 00000000..f1bee29f --- /dev/null +++ b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out @@ -0,0 +1,2 @@ + . + . diff --git a/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl new file mode 100644 index 00000000..502bc150 --- /dev/null +++ b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl @@ -0,0 +1,7 @@ +@prefix : . + +# A dot can't be the first nor the last character of the "after the semicolon part of a prefixed name" but is allowed in the middle +# See https://www.w3.org/TR/turtle/#grammar-production-PN_LOCAL +# See https://github.com/sweetrdf/easyrdf/issues/51 +:Subject.WithADot :predicate.withADot :Object.WithADot . +:Subject.With.Dots :predicate.with.dots :Object.With.Dots . diff --git a/test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.out b/test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.out new file mode 100644 index 00000000..042480d5 --- /dev/null +++ b/test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.out @@ -0,0 +1,2 @@ + "Test"@en . + "Test"^^ . diff --git a/test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.ttl b/test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.ttl new file mode 100644 index 00000000..3ae3b663 --- /dev/null +++ b/test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.ttl @@ -0,0 +1,9 @@ +@prefix rdfs: . +@prefix xs: . +@prefix : . + +# Allow whitespace between literal and language tag +:a rdfs:label "Test" @en . + +# Allow whitespace between literal and datatype IRI +:a rdfs:label "Test" ^^xs:string . diff --git a/tests/EasyRdf/Parser/TurtleTest.php b/tests/EasyRdf/Parser/TurtleTest.php index 0960e9d5..ffbcf7c1 100644 --- a/tests/EasyRdf/Parser/TurtleTest.php +++ b/tests/EasyRdf/Parser/TurtleTest.php @@ -434,7 +434,7 @@ public function testBad07() // paths are not in turtle $this->expectException('EasyRdf\Parser\Exception'); $this->expectExceptionMessage( - 'Turtle Parse Error: object for statement missing on line 3, column 5' + "Turtle Parse Error: expected an RDF value here, found '^' on line 3, column 3" ); $this->parseTurtle('turtle/bad-07.ttl'); } @@ -580,14 +580,45 @@ public function testIssue140() } /** + * Allow usage of dot in middle of name + * + * @see https://github.com/sweetrdf/easyrdf/issues/51 + */ + public function testIssue51() + { + $this->turtleTestCase('gh51-sweetrdf-dot-in-name'); + } + + public function testIssue51Bad() + { + // Test long literals with missing end + $this->expectException('EasyRdf\Parser\Exception'); + $this->expectExceptionMessage( + 'Turtle Parse Error: last character of QName must not be a dot on line 7, column 20' + ); + $this->parseTurtle('turtle/gh51-sweetrdf-dot-in-name-bad.ttl'); + } + + /** + * Allow whitespace(s) between quoted string literal and langtag or datatype + * + * @see https://github.com/sweetrdf/easyrdf/issues/52 + */ + public function testIssue52() + { + $this->turtleTestCase('gh52-sweetrdf-whitespace-langtag'); + } + + /** + * Bug in Turtle parser for boolean value that is directly followed by a semicolon + * * @see https://github.com/sweetrdf/easyrdf/issues/58 - * Notice this is an issue reported in the sweetrdf/easyrdf fork */ public function testIssue58() { // Test file should parse without exceptions $this->expectNotToPerformAssertions(); - $this->parseTurtle("turtle/gh58-sweetrdf-bool-parser.ttl"); + $this->parseTurtle('turtle/gh58-sweetrdf-bool-parser.ttl'); } }