diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 75983cc6e5eaed..ef0c0b969f671f 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -86,6 +86,23 @@ static void test_grammar(const std::string & test_desc, const std::string & gram if (!matched) { fprintf(stderr, "❌ (failed to match)\n"); + + // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed. + // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf + FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w"); + if (grammar_file) { + fprintf(grammar_file, "%s", grammar_str.c_str()); + fclose(grammar_file); + } + + // DEBUG: Write the test string to test-grammar-integration.string.txt + FILE* string_file = fopen("test-grammar-integration.string.txt", "w"); + if (string_file) { + fprintf(string_file, "%s", test_string.c_str()); + fclose(string_file); + } + + fprintf(stderr, " Analyze in detail by running: `./gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt`\n"); } else { fprintf(stdout, "✅︎\n"); } @@ -372,7 +389,7 @@ static void test_json_schema() { // Otherwise, this test structure is the same. test_grammar( - "empty schema", + "empty schema (object)", // Grammar json_schema_to_grammar(nlohmann::ordered_json::parse( R"""( @@ -389,6 +406,536 @@ static void test_json_schema() { "", } ); + + test_grammar( + "exotic formats (list)", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "items": [ + { "format": "date" }, + { "format": "uuid" }, + { "format": "time" }, + { "format": "date-time" } + ] +} + )""" + )), + // Passing strings + { + // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""", + //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + }, + // Failing strings + { + R"""(["foo", "bar"])""", + R"""(["12345678-1234-1234-1234-1234567890ab"])""", + } + ); + + test_grammar( + "string", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "string" +} + )""" + )), + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"\"", + }, + // Failing strings + { + "{}", + "\"foo\": \"bar\"", + } + ); + + test_grammar( + "string w/ min length 1", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "string", + "minLength": 1 +} + )""" + )), + // Passing strings + { + "\"foo\"", + "\"bar\"", + }, + // Failing strings + { + "\"\"", + "{}", + "\"foo\": \"bar\"", + } + ); + + test_grammar( + "string w/ min length 3", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "string", + "minLength": 3 +} + )""" + )), + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"foobar\"", + }, + // Failing strings + { + "\"\"", + "\"f\"", + "\"fo\"", + } + ); + + test_grammar( + "string w/ max length", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "string", + "maxLength": 3 +} + )""" + )), + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"\"", + "\"f\"", + "\"fo\"", + }, + // Failing strings + { + "\"foobar\"", + } + ); + + test_grammar( + "string w/ min & max length", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "string", + "minLength": 1, + "maxLength": 4 +} + )""" + )), + // Passing strings + { + "\"foo\"", + "\"bar\"", + "\"f\"", + "\"barf\"", + }, + // Failing strings + { + "\"\"", + "\"barfo\"", + "\"foobar\"", + } + ); + + test_grammar( + "boolean", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "boolean" +} + )""" + )), + // Passing strings + { + "true", + "false", + }, + // Failing strings + { + "\"\"", + "\"true\"", + "True", + "FALSE", + } + ); + + test_grammar( + "integer", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "integer" +} + )""" + )), + // Passing strings + { + "0", + "12345", + "1234567890123456" + }, + // Failing strings + { + "", + "01", + "007", + "12345678901234567" + } + ); + + test_grammar( + "string const", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "const": "foo" +} + )""" + )), + // Passing strings + { + "\"foo\"", + }, + // Failing strings + { + "foo", + "\"bar\"", + } + ); + + test_grammar( + "non-string const", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "const": true +} + )""" + )), + // Passing strings + { + "true", + }, + // Failing strings + { + "", + "foo", + "\"true\"", + } + ); + + test_grammar( + "non-string const", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "enum": ["red", "amber", "green", null, 42, ["foo"]] +} + )""" + )), + // Passing strings + { + "\"red\"", + "null", + "42", + "[\"foo\"]", + }, + // Failing strings + { + "", + "420", + "true", + "foo", + } + ); + + + test_grammar( + "min+max items", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 +} + )""" + )), + // Passing strings + { + "[1, 2, 3]", + "[1, 2, 3, 4]", + "[1, 2, 3, 4, 5]", + }, + // Failing strings + { + "[1, 2]", + "[1, 2, 3, 4, 5, 6]", + "1" + } + ); + + // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) + test_grammar( + "object properties", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + } +} + )""" + )), + // Passing strings + { + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // "By default, leaving out properties is valid" + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + // "By extension, even an empty object is valid" + R"""({})""", + // "By default, providing additional properties is valid" + // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. + // R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", + // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + // R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", + }, + // Failing strings + { + // Change datatype from number to string + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Reorder properties + R"""({ "street_name": "Pennsylvania", "number": 1600 })""", + // Reorder properties + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + } + ); + + + // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) + test_grammar( + "object properties, additionalProperties: true", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": true +} + )""" + )), + // Passing strings + { + //R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", + // "By default, leaving out properties is valid" + //R"""({ "street_name": "Pennsylvania" })""", + //R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + // "By extension, even an empty object is valid" + R"""({})""", + // "By default, providing additional properties is valid" + // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. + //R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", + // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + // R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", + }, + // Failing strings + { + // Change datatype from number to string + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Reorder properties + R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""", + } + ); + + // Additional properties: false + test_grammar( + "required + optional props each in original order", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": false +} + )""" + )), + // Passing strings + { + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_type":"Avenue"})""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + // R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", + }, + // Failing strings + { + // Reorder properties + R"""({ "street_type": "Avenue", "number": 1600 })""", + // Add "direction" + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""", + } + ); + + test_grammar( + "required + optional props each in original order", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "properties": { + "b": {"type": "string"}, + "a": {"type": "string"}, + "d": {"type": "string"}, + "c": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false +} + )""" + )), + // Passing strings + { + "{\"b\": \"foo\", \"a\": \"bar\"}", + "{\"b\":\"foo\",\"a\":\"bar\",\"d\":\"qux\"}", + "{\"b\":\"foo\", \"a\":\"bar\", \"d\":\"qux\", \"c\":\"baz\"}", + }, + // Failing strings + { + "{\"a\": \"foo\", \"b\": \"bar\"}", + "{\"b\": \"bar\"}", + "{\"a\": \"foo\", \"c\": \"baz\"}", + "{\"a\":\"foo\", \"b\":\"bar\", \"c\":\"baz\", \"d\":\"qux\"}", + } + ); + + // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties + test_grammar( + "required props", + // Grammar + json_schema_to_grammar(nlohmann::ordered_json::parse( + R"""( +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/product.schema.json", + "title": "Product", + "description": "A product from Acme's catalog", + "type": "object", + "properties": { + "productId": { + "description": "The unique identifier for a product", + "type": "integer" + }, + "productName": { + "description": "Name of the product", + "type": "string" + }, + "price": { + "description": "The price of the product", + "type": "number", + "exclusiveMinimum": 0 + }, + "tags": { + "description": "Tags for the product", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": true + }, + "dimensions": { + "type": "object", + "properties": { + "length": { + "type": "number" + }, + "width": { + "type": "number" + }, + "height": { + "type": "number" + } + }, + "required": [ "length", "width", "height" ] + } + }, + "required": [ "productId", "productName", "price" ] +} + )""" + )), + // Passing strings + { + "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50}", + "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": [\"home\", \"green\"]}", + "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": [\"home\", \"green\"], \"dimensions\": {\"length\": 785, \"width\": 250.5, \"height\": -0.359}}", + }, + // Failing strings + { + "{}", // Missing all required properties + "{\"productName\": \"A green door\", \"price\": 12.50, \"productId\": 1}", // Out of order properties + // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement. + // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex. + // "{\"productId\": 1, \"productName\": \"A green door\", \"price\": -12.50}", + "{\"productId\": 1, \"productName\": \"A green door\"}", // Missing required property (price) + "{\"productName\": \"A green door\", \"price\": 12.50}", // Missing required property (productId) + "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": []}", // tags is empty, but minItems is 1 + "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"dimensions\": {\"length\": 785, \"width\": 250.5, \"height\": -0.359}, \"tags\": [\"home\", \"green\"]}", // Tags and dimensions are out of order + // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement. + // "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": [\"home\", \"green\", \"home\"]}", + + } + ); + + } int main() {