Skip to content

Commit

Permalink
Merge branch 'master' into issue/58-bugfix-turtle-bool-parser
Browse files Browse the repository at this point in the history
  • Loading branch information
k00ni committed Jan 18, 2025
2 parents 3721153 + c573f61 commit bdb135f
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 5 deletions.
10 changes: 9 additions & 1 deletion lib/Parser/Turtle.php
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,8 @@ protected function parseQuotedLiteral()
{
$label = $this->parseQuotedString();

$this->skipWSC();

// Check for presence of a language tag or datatype
$c = $this->peek();

Expand Down Expand Up @@ -906,7 +908,7 @@ protected function parseQNameOrBoolean()
if ('true' == $value || 'false' == $value) {
// Unread last character
$this->unread($c);

return [
'type' => 'literal',
'value' => $value,
Expand Down Expand Up @@ -943,6 +945,11 @@ protected function parseQNameOrBoolean()
}
$c = $this->read();
}

// Last char of name must not be a dot
if (mb_substr($localName, -1) === '.') {
throw new Exception("Turtle Parse Error: last character of QName must not be a dot", $this->line, $this->column - 1);
}
}

// Unread last character
Expand Down Expand Up @@ -1220,6 +1227,7 @@ public static function isNameChar($c)
self::isNameStartChar($c)
|| $o >= 0x30 && $o <= 0x39 // 0-9
|| '-' == $c
|| '.' == $c // dots are allowed in the middle of a name, not as start char
|| 0x00B7 == $o;
}

Expand Down
1 change: 0 additions & 1 deletion test/fixtures/turtle/bad-07.ttl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# paths are not in turtle
@prefix : <http://example.org/stuff/1.0/> .
:a.:b.:c .
:a^:b^:c .
9 changes: 9 additions & 0 deletions test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@prefix : <http://example.org/#> .

# A dot can't be the first nor the last character of the "after the semicolon part of a prefixed name" but is allowed in the middle
# See https://www.w3.org/TR/turtle/#grammar-production-PN_LOCAL
# See https://github.com/sweetrdf/easyrdf/issues/51
# The triples below should not parse!
:SubjectWithEndDot. :predicate :Object .
:Subject :predicateWithEndDot. :Object .
:Subject :predicate :ObjectWithEndDot. .
2 changes: 2 additions & 0 deletions test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<http://example.org/#Subject.WithADot> <http://example.org/#predicate.withADot> <http://example.org/#Object.WithADot> .
<http://example.org/#Subject.With.Dots> <http://example.org/#predicate.with.dots> <http://example.org/#Object.With.Dots> .
7 changes: 7 additions & 0 deletions test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@prefix : <http://example.org/#> .

# A dot can't be the first nor the last character of the "after the semicolon part of a prefixed name" but is allowed in the middle
# See https://www.w3.org/TR/turtle/#grammar-production-PN_LOCAL
# See https://github.com/sweetrdf/easyrdf/issues/51
:Subject.WithADot :predicate.withADot :Object.WithADot .
:Subject.With.Dots :predicate.with.dots :Object.With.Dots .
2 changes: 2 additions & 0 deletions test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<http://example.org/#a> <http://www.w3.org/2000/01/rdf-schema#label> "Test"@en .
<http://example.org/#a> <http://www.w3.org/2000/01/rdf-schema#label> "Test"^^<http://www.w3.org/2001/XMLSchema#string> .
9 changes: 9 additions & 0 deletions test/fixtures/turtle/gh52-sweetrdf-whitespace-langtag.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xs: <http://www.w3.org/2001/XMLSchema#> .
@prefix : <http://example.org/#> .

# Allow whitespace between literal and language tag
:a rdfs:label "Test" @en .

# Allow whitespace between literal and datatype IRI
:a rdfs:label "Test" ^^xs:string .
37 changes: 34 additions & 3 deletions tests/EasyRdf/Parser/TurtleTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ public function testBad07()
// paths are not in turtle
$this->expectException('EasyRdf\Parser\Exception');
$this->expectExceptionMessage(
'Turtle Parse Error: object for statement missing on line 3, column 5'
"Turtle Parse Error: expected an RDF value here, found '^' on line 3, column 3"
);
$this->parseTurtle('turtle/bad-07.ttl');
}
Expand Down Expand Up @@ -580,14 +580,45 @@ public function testIssue140()
}

/**
* Allow usage of dot in middle of name
*
* @see https://github.com/sweetrdf/easyrdf/issues/51
*/
public function testIssue51()
{
$this->turtleTestCase('gh51-sweetrdf-dot-in-name');
}

public function testIssue51Bad()
{
// Test long literals with missing end
$this->expectException('EasyRdf\Parser\Exception');
$this->expectExceptionMessage(
'Turtle Parse Error: last character of QName must not be a dot on line 7, column 20'
);
$this->parseTurtle('turtle/gh51-sweetrdf-dot-in-name-bad.ttl');
}

/**
* Allow whitespace(s) between quoted string literal and langtag or datatype
*
* @see https://github.com/sweetrdf/easyrdf/issues/52
*/
public function testIssue52()
{
$this->turtleTestCase('gh52-sweetrdf-whitespace-langtag');
}

/**
* Bug in Turtle parser for boolean value that is directly followed by a semicolon
*
* @see https://github.com/sweetrdf/easyrdf/issues/58
* Notice this is an issue reported in the sweetrdf/easyrdf fork
*/
public function testIssue58()
{
// Test file should parse without exceptions
$this->expectNotToPerformAssertions();

$this->parseTurtle("turtle/gh58-sweetrdf-bool-parser.ttl");
$this->parseTurtle('turtle/gh58-sweetrdf-bool-parser.ttl');
}
}

0 comments on commit bdb135f

Please sign in to comment.