From c8ed02dd532e1fed7c7350b0c2ac73590df8401b Mon Sep 17 00:00:00 2001 From: Jens Alfke Date: Fri, 30 Apr 2021 15:05:24 -0700 Subject: [PATCH] API: Added printf-style formatted builder functions These functions let you create complex nested Fleece structures with a single call. It's basically JSON with values substituted from arguments using printf-style "%" specs. See comments in Builder.hh for details. --- API/fleece/FLMutable.h | 67 ++++ API/fleece/Mutable.hh | 69 ++++ Fleece.xcodeproj/project.pbxproj | 12 + Fleece/API_Impl/Fleece.cc | 36 +++ Fleece/Core/Builder.cc | 539 +++++++++++++++++++++++++++++++ Fleece/Core/Builder.hh | 68 ++++ Fleece/Support/slice_stream.hh | 4 + ObjC/Encoder+ObjC.mm | 2 +- Tests/BuilderTests.cc | 169 ++++++++++ cmake/platform_base.cmake | 2 + 10 files changed, 967 insertions(+), 1 deletion(-) create mode 100644 Fleece/Core/Builder.cc create mode 100644 Fleece/Core/Builder.hh create mode 100644 Tests/BuilderTests.cc diff --git a/API/fleece/FLMutable.h b/API/fleece/FLMutable.h index d2f1d939..531fa6e7 100644 --- a/API/fleece/FLMutable.h +++ b/API/fleece/FLMutable.h @@ -15,6 +15,7 @@ #define _FLMUTABLE_H #include "FLValue.h" +#include FL_ASSUME_NONNULL_BEGIN @@ -339,6 +340,72 @@ extern "C" { FLSlot_SetValue(slot, (FLValue)dict); } + /** @} */ + + + //====== FORMATTED VALUE BUILDER + + + /** \defgroup builder Fleece Formatted Value Builder + @{ + These functions use the `printf` idiom to make it convenient to create structured Fleece + values in memory with one call. They create or modify a `FLMutableArray` or `FLMutableDict + by reading the given format string and the following arguments. + + The format string is basically JSON5, except that any value in it may be a printf-style + '%' specifier instead of a literal, in which case that value will be read from the next + argument. The supported format specifiers are: + + - Boolean: `%c` (cast the arg to `char` to avoid a compiler warning) + - Integer: `%i` or `%d` (use size specifiers `l`, `ll`, or `z`) + - Unsigned integer: `%u` (use size specifiers `l`, `ll`, or `z`) + - Floating point: `%f` (arg can be `float` or `double`; no size spec needed) + - C string: `%s` + - Ptr+length string: `%.*s` (takes two args, a `const char*` and an `int`. See `FMTSLICE`.) + - Fleece value: `%p` (arg must be a `FLValue`) + - [Core]Foundation: `%@` (Apple platforms only: arg must be a `NSString`, `NSNumber`, + `NSArray`, `NSDictionary`, `NSNull`, or equivalent `CFTypeRef`) + + A `-` can appear after the `%`, indicating that the argument should be ignored if it has + a default value, namely `false`, 0, or an empty string. This means the corresponding item + won't be written (a Dict item will be erased if it previously existed.) + + If an argument is a NULL pointer nothing is written, and any pre-existing Dict item will + be removed. + + \note It's legal for a dict key to be repeated; later occurrences take precedence, + i.e. each one overwrites the last. + */ + + /** Translates the JSON-style format string into a tree of mutable Fleece objects, adding + values from the following arguments wherever a printf-style `%` specifier appears. + \note The result will be either an `FLMutableArray` or `FLMutableString` depending on + the syntax of the format string. + \warning The returned value must be released when you're done with it. */ + NODISCARD + FLEECE_PUBLIC FLValue FLValue_NewWithFormat(const char *format, ...) __printflike(1, 2); + + /** Variant of \ref FLValue_NewWithFormat that takes a pre-existing `va_list`. + \warning The returned value must be released when you're done with it. */ + NODISCARD + FLEECE_PUBLIC FLValue FLValue_NewWithFormatV(const char *format, va_list args); + + /** Like \ref FLValue_NewWithFormat, except it operates on an existing mutable array. + The values parsed from the format string and arguments will be appended to it. */ + FLEECE_PUBLIC void FLMutableArray_UpdateWithFormat(FLMutableArray, const char *format, ...) + __printflike(2, 3); + + /** Like \ref FLValue_NewWithFormat, except it operates on an existing mutable dict. + (Pre-existing properties not appearing in the format string are preserved.) */ + FLEECE_PUBLIC void FLMutableDict_UpdateWithFormat(FLMutableDict, const char *format, ...) + __printflike(2, 3); + + /** Like \ref FLMutableArray_UpdateWithFormat / \ref FLMutableDict_UpdateWithFormat + but takes a pre-existing `va_list`. */ + FLEECE_PUBLIC void FLValue_UpdateWithFormatV(FLValue, const char *format, va_list args); + + /** @} */ + // implementations of the inline methods declared earlier: diff --git a/API/fleece/Mutable.hh b/API/fleece/Mutable.hh index 036aa558..fccf0682 100644 --- a/API/fleece/Mutable.hh +++ b/API/fleece/Mutable.hh @@ -86,6 +86,12 @@ namespace fleece { /** Creates a new, empty mutable array. */ static MutableArray newArray() {return MutableArray(FLMutableArray_New(), false);} + /** Creates a mutable array with values in it, based on a JSON-like format string. + Argument values can be substituted into it. + See the documentation of the "Fleece Formatted Value Builder" in FLMutable.h. */ + static inline MutableArray newWithFormat(const char *format, ...) __printflike(1, 2); + static inline MutableArray newWithFormatV(const char *format, va_list args); + MutableArray() :Array() { } MutableArray(FLMutableArray FL_NULLABLE a) :Array((FLArray)FLMutableArray_Retain(a)) { } MutableArray(const MutableArray &a) :Array((FLArray)FLMutableArray_Retain(a)) { } @@ -147,6 +153,11 @@ namespace fleece { inline MutableArray getMutableArray(uint32_t i); inline MutableDict getMutableDict(uint32_t i); + /** Like \ref newWithFormat, except it operates on an existing mutable array. + (Pre-existing properties not appearing in the format string are preserved.) */ + inline void updateWithFormat(const char* format, ...) __printflike(2, 3); + inline void updateWithFormatV(const char* format, va_list); + private: MutableArray(FLMutableArray FL_NULLABLE a, bool) :Array((FLArray)a) {} friend class RetainedValue; @@ -162,6 +173,12 @@ namespace fleece { public: static MutableDict newDict() {return MutableDict(FLMutableDict_New(), false);} + /** Creates a mutable dict with values in it, based on a JSON-like format string. + Argument values can be substituted into it. + See the documentation of the "Fleece Formatted Value Builder" in FLMutable.h. */ + static inline MutableDict newWithFormat(const char *format, ...) __printflike(1, 2); + static inline MutableDict newWithFormatV(const char *format, va_list args); + MutableDict() :Dict() { } MutableDict(FLMutableDict FL_NULLABLE d):Dict((FLDict)d) {FLMutableDict_Retain(*this);} MutableDict(const MutableDict &d) :Dict((FLDict)d) {FLMutableDict_Retain(*this);} @@ -212,6 +229,11 @@ namespace fleece { inline MutableArray getMutableArray(slice key); inline MutableDict getMutableDict(slice key); + /** Like \ref newWithFormat, except it operates on an existing mutable dict. + (Pre-existing properties not appearing in the format string are preserved.) */ + inline void updateWithFormat(const char* format, ...) __printflike(2, 3); + inline void updateWithFormatV(const char* format, va_list); + private: MutableDict(FLMutableDict FL_NULLABLE d, bool) :Dict((FLDict)d) {} friend class RetainedValue; @@ -358,6 +380,53 @@ namespace fleece { return MutableDict(FLDict_AsMutable(*this)); } + MutableArray MutableArray::newWithFormat(const char *format, ...) { + va_list args; + va_start(args, format); + auto result = newWithFormatV(format, args); + va_end(args); + return result; + } + + MutableArray MutableArray::newWithFormatV(const char *format, va_list args) { + return MutableArray(FLArray_AsMutable(FLValue_AsArray(FLValue_NewWithFormatV(format, args)))); + } + + void MutableArray::updateWithFormat(const char* format, ...) { + va_list args; + va_start(args, format); + updateWithFormatV(format, args); + va_end(args); + } + + void MutableArray::updateWithFormatV(const char* format, va_list args) { + FLValue_UpdateWithFormatV(*this, format, args); + } + + + MutableDict MutableDict::newWithFormat(const char *format, ...) { + va_list args; + va_start(args, format); + auto result = newWithFormatV(format, args); + va_end(args); + return result; + } + + MutableDict MutableDict::newWithFormatV(const char *format, va_list args) { + return MutableDict(FLDict_AsMutable(FLValue_AsDict(FLValue_NewWithFormatV(format, args)))); + } + + void MutableDict::updateWithFormat(const char* format, ...) { + va_list args; + va_start(args, format); + updateWithFormatV(format, args); + va_end(args); + } + + void MutableDict::updateWithFormatV(const char* format, va_list args) { + FLValue_UpdateWithFormatV(*this, format, args); + } + } FL_ASSUME_NONNULL_END diff --git a/Fleece.xcodeproj/project.pbxproj b/Fleece.xcodeproj/project.pbxproj index 0e70e85f..136ddc00 100644 --- a/Fleece.xcodeproj/project.pbxproj +++ b/Fleece.xcodeproj/project.pbxproj @@ -199,6 +199,9 @@ 27DE2EEB2125FC9300123597 /* FleeceException.cc in Sources */ = {isa = PBXBuildFile; fileRef = 275CED501D3EF7BE001DE46C /* FleeceException.cc */; }; 27DFAE12219F83AB00DF57EB /* InstanceCounted.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27DFAE10219F83AB00DF57EB /* InstanceCounted.hh */; }; 27DFAE13219F83AB00DF57EB /* InstanceCounted.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27DFAE11219F83AB00DF57EB /* InstanceCounted.cc */; }; + 27E3CE0F263B1B0700CA7056 /* Builder.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27E3CE0D263B1B0700CA7056 /* Builder.hh */; }; + 27E3CE10263B1B0700CA7056 /* Builder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27E3CE0E263B1B0700CA7056 /* Builder.cc */; }; + 27E3CE12263B525E00CA7056 /* BuilderTests.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27E3CE11263B525E00CA7056 /* BuilderTests.cc */; }; 27E3DD421DB6A14200F2872D /* SharedKeys.cc in Sources */ = {isa = PBXBuildFile; fileRef = 27E3DD401DB6A14200F2872D /* SharedKeys.cc */; }; 27E3DD431DB6A14200F2872D /* SharedKeys.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27E3DD411DB6A14200F2872D /* SharedKeys.hh */; }; 27E3DD4C1DB6C32400F2872D /* CaseListReporter.hh in Headers */ = {isa = PBXBuildFile; fileRef = 27E3DD4A1DB6C32400F2872D /* CaseListReporter.hh */; }; @@ -472,6 +475,9 @@ 27DE2EDF2125FA1700123597 /* libfleeceBase.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libfleeceBase.a; sourceTree = BUILT_PRODUCTS_DIR; }; 27DFAE10219F83AB00DF57EB /* InstanceCounted.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = InstanceCounted.hh; sourceTree = ""; }; 27DFAE11219F83AB00DF57EB /* InstanceCounted.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = InstanceCounted.cc; sourceTree = ""; }; + 27E3CE0D263B1B0700CA7056 /* Builder.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Builder.hh; sourceTree = ""; }; + 27E3CE0E263B1B0700CA7056 /* Builder.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Builder.cc; sourceTree = ""; }; + 27E3CE11263B525E00CA7056 /* BuilderTests.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = BuilderTests.cc; sourceTree = ""; }; 27E3DD401DB6A14200F2872D /* SharedKeys.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SharedKeys.cc; sourceTree = ""; }; 27E3DD411DB6A14200F2872D /* SharedKeys.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = SharedKeys.hh; sourceTree = ""; }; 27E3DD471DB6B86000F2872D /* catch.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = catch.hpp; sourceTree = ""; }; @@ -674,6 +680,7 @@ 27F666432017E26700A8ED31 /* SupportTests.cc */, 279AC52A1C07776A002C80DB /* ValueTests.cc */, 272E5A5B1BF800A100848580 /* EncoderTests.cc */, + 27E3CE11263B525E00CA7056 /* BuilderTests.cc */, 27E3DD521DB7DB1C00F2872D /* SharedKeysTests.cc */, 276D15481E008E7A00543B1B /* JSON5Tests.cc */, 27298E771C01A461000CFBA8 /* PerfTests.cc */, @@ -900,6 +907,8 @@ 27A924CE1D9C32E800086206 /* Path.hh */, 27298E7F1C04E665000CFBA8 /* Encoder.cc */, 270FA26F1BF53CEA005DCB13 /* Encoder.hh */, + 27E3CE0E263B1B0700CA7056 /* Builder.cc */, + 27E3CE0D263B1B0700CA7056 /* Builder.hh */, 27298E3A1C00F812000CFBA8 /* JSONConverter.cc */, 27298E761C00FB48000CFBA8 /* JSONConverter.hh */, 27E3DD401DB6A14200F2872D /* SharedKeys.cc */, @@ -989,6 +998,7 @@ 2734B8AD1F859AEC00BE5249 /* FleeceDocument.h in Headers */, 2776AA792093C982004ACE85 /* sliceIO.hh in Headers */, 278163BD1CE7A72300B94E32 /* KeyTree.hh in Headers */, + 27E3CE0F263B1B0700CA7056 /* Builder.hh in Headers */, 27C4ACAD1CE5146500938365 /* Array.hh in Headers */, 2776AA22208678AA004ACE85 /* DeepIterator.hh in Headers */, 2734B89E1F8583FF00BE5249 /* MArray.hh in Headers */, @@ -1329,6 +1339,7 @@ 27A924CF1D9C32E800086206 /* Path.cc in Sources */, 274D824C209A7577008BB39F /* HeapArray.cc in Sources */, 2734B8B11F870FB400BE5249 /* MContext.cc in Sources */, + 27E3CE10263B1B0700CA7056 /* Builder.cc in Sources */, 27A0E3E024DCD86900380563 /* ConcurrentArena.cc in Sources */, 275CED521D3EF7BE001DE46C /* FleeceException.cc in Sources */, 278163B51CE69CA800B94E32 /* Fleece.cc in Sources */, @@ -1374,6 +1385,7 @@ 27298E781C01A461000CFBA8 /* PerfTests.cc in Sources */, 2734B8A71F85842300BE5249 /* MTests.mm in Sources */, 272E5A5F1BF91DBE00848580 /* ObjCTests.mm in Sources */, + 27E3CE12263B525E00CA7056 /* BuilderTests.cc in Sources */, 277F45B4208FDA1800A0D159 /* HashTreeTests.cc in Sources */, 27CEE41A20EFE92E00089A85 /* KeyTree.cc in Sources */, ); diff --git a/Fleece/API_Impl/Fleece.cc b/Fleece/API_Impl/Fleece.cc index d5ff9a10..3c99191b 100644 --- a/Fleece/API_Impl/Fleece.cc +++ b/Fleece/API_Impl/Fleece.cc @@ -17,6 +17,7 @@ #include "fleece/Fleece.h" #include "JSON5.hh" #include "ParseDate.hh" +#include "Builder.hh" #include "betterassert.hh" #include @@ -779,6 +780,41 @@ FLSliceResult FLEncoder_Finish(FLEncoder e, FLError * FL_NULLABLE outError) FLAP } +#pragma mark - BUILDER + + + FLValue FLValue_NewWithFormat(const char *format, ...) { + va_list args; + va_start(args, format); + auto result = FLValue_NewWithFormatV(format, args); + va_end(args); + return result; + } + + FLValue FLValue_NewWithFormatV(const char *format, va_list args) { + return std::move(builder::VBuild(format, args)).detach(); + } + + void FLMutableArray_UpdateWithFormat(FLMutableArray array, const char *format, ...) { + va_list args; + va_start(args, format); + FLValue_UpdateWithFormatV(array, format, args); + va_end(args); + } + + void FLMutableDict_UpdateWithFormat(FLMutableDict dict, const char *format, ...) { + va_list args; + va_start(args, format); + FLValue_UpdateWithFormatV(dict, format, args); + va_end(args); + } + + void FLValue_UpdateWithFormatV(FLValue v, const char *format, va_list args) { + assert(FLValue_IsMutable(v)); + builder::VPut(const_cast(v), format, args); + } + + #pragma mark - DOCUMENTS diff --git a/Fleece/Core/Builder.cc b/Fleece/Core/Builder.cc new file mode 100644 index 00000000..06c144e2 --- /dev/null +++ b/Fleece/Core/Builder.cc @@ -0,0 +1,539 @@ +// +// Builder.cc +// +// Copyright © 2021 Couchbase. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "Builder.hh" +#include "FleeceException.hh" +#include "JSON5.hh" +#include "MutableDict.hh" +#include "MutableArray.hh" +#include "NumConversion.hh" +#include "slice_stream.hh" +#include +#include + +#ifdef __APPLE__ +#include "fleece/Fleece+CoreFoundation.h" +#endif + +namespace fleece::impl::builder { + using namespace std; + + + class Builder { + public: + + Builder(slice formatString, va_list args) + :_format(formatString) + ,_in(_format) + { + va_copy(_args, args); + } + + + // Parses the format, interpolates args, and returns a new mutable Array or Dict. + RetainedConst buildValue() { + switch (peekToken()) { + case '[': { + Retained array = MutableArray::newArray(); + _buildInto(array); + finished(); + return array.get(); + } + case '{': { + Retained dict = MutableDict::newDict(); + _buildInto(dict); + finished(); + return dict.get(); + } + default: + fail("only '{...}' or '[...]' allowed at top level"); + } + } + + + void buildInto(MutableDict *dict) { + if (peekToken() != '{') + fail("expected '{'"); + _buildInto(dict); + } + + + void buildInto(MutableArray *array) { + if (peekToken() != '[') + fail("expected '['"); + _buildInto(array); + } + + + protected: + // Parses a Fleece value from the input and stores it in the ValueSlot. + // Recognizes a '%' specifier, and calls `putParameter` to read the value from the args. + const bool _buildValue(ValueSlot &inSlot) { + switch (peekToken()) { + case '[': { + Retained array = MutableArray::newArray(); + _buildInto(array); + inSlot.set(array); + break; + } + case '{': { + Retained dict = MutableDict::newDict(); + _buildInto(dict); + inSlot.set(dict); + break; + } + case 'n': + readIdentifier("null"); + inSlot.set(nullValue); + break; + case 't': + readIdentifier("true"); + inSlot.set(true); + break; + case 'f': + readIdentifier("false"); + inSlot.set(false); + break; + case '-': + case '+': + case '.': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + readLiteralNumber(inSlot); + break; + case '"': + case '\'': + inSlot.set(readLiteralString()); + break; + case '%': + get(); + return putParameter(inSlot); + default: + fail("invalid start of value"); + } + return true; + } + + + // Parses a JSON5 object from the input and adds its entries to `dict`. + void _buildInto(MutableDict *dict) { + get(); // skip the opening '{' *without verifying* + char c; + while ('}' != (c = peekToken())) { + // Scan key: + string key; + if (c == '"' || c == '\'') { + // Key string: + key = readLiteralString(); + } else if (isalpha(c) || c == '_' || c == '$') { + // JSON5 unquoted key: + key = string(readIdentifier()); + } else { + fail("expected dict key"); + } + + if (peekToken() != ':') + fail("expected ':' after dict key"); + get(); + + // Value: + if (!_buildValue(dict->setting(key))) + dict->remove(key); + + if (peekToken() == ',') // Note: JSON5 allows trailing `,` before `}` + get(); + else if (peekToken() != '}') + fail("unexpected token after dict item"); + } + get(); // eat close bracket/brace + } + + + // Parses a JSON5 array from the input and adds its entries to `dict`. + void _buildInto(MutableArray *array) { + get(); // skip the opening '[' *without verifying* + while (peekToken() != ']') { + if (!_buildValue(array->appending())) + array->remove(array->count() - 1, 1); + + if (peekToken() == ',') // Note: JSON5 allows trailing `,` before `]` + get(); + else if (peekToken() != ']') + fail("unexpected token after array item"); + } + get(); // eat close bracket/brace + } + + +#pragma mark - PARAMETER SUBSTITUTION: + + + // This is where those crazy printf format specs get parsed. + bool putParameter(ValueSlot &inSlot) { + char c = get(); + // `-` means to skip this arg if it has a default value: + bool skipDefault = (c == '-'); + if (skipDefault) + c = get(); + + // Size specifier: + char size = ' '; + if (c == 'l' || c == 'q' || c == 'z') { + size = c; + c = get(); + if (size == 'l' && c == 'l') { + size = 'q'; + c = get(); + } + } + + switch (c) { + case 'c': case 'b': { + // Bool: + bool param = va_arg(_args, int) != 0; + if (skipDefault && !param) + return false; + inSlot.set(param); + break; + } + case 'd': case 'i': { + // Signed integers: + int64_t param; + if (size == 'q') + param = va_arg(_args, long long); + else if (size == 'z') + param = va_arg(_args, ptrdiff_t); + else if (size == 'l') + param = va_arg(_args, long); + else + param = va_arg(_args, int); + if (skipDefault && param == 0) + return false; + inSlot.set(param); + break; + } + case 'u': { + // Unsigned integers: + uint64_t param; + if (size == 'q') + param = va_arg(_args, unsigned long long); + else if (size == 'z') + param = va_arg(_args, size_t); + else if (size == 'l') + param = va_arg(_args, unsigned long); + else + param = va_arg(_args, unsigned int); + if (skipDefault && param == 0) + return false; + inSlot.set(param); + break; + } + case 'f': { + // Floats: + double param = va_arg(_args, double); + if (skipDefault && param == 0.0) + return false; + inSlot.set(param); + break; + } + case 's': { + // C string: + slice param(va_arg(_args, const char*)); + if (!param || (skipDefault && param.empty())) + return false; + inSlot.set(param); + break; + } + case '.': { + // Slice ("%.*s") -- takes 2 args: the start and size (see FMTSLICE() macro) + if (get() != '*' || get() != 's') + fail("'.' qualifier only supported in '%.*s'"); + int len = va_arg(_args, int); + auto str = va_arg(_args, void*); + if (!str || (skipDefault && len == 0)) + return false; + inSlot.set(slice(str, len)); + break; + } + case 'p': { + // "%p" is a Fleece value: + auto param = va_arg(_args, const Value*); + if (!param) + return false; + inSlot.set(param); + break; + } +#if __APPLE__ + case '@': { + // "%@" substitutes an Objective-C or CoreFoundation object. + auto param = va_arg(_args, CFTypeRef); + if (!param) + return false; + FLSlot_SetCFValue(FLSlot(&inSlot), param); + return true; + } +#endif + default: + fail("unknown '%' format specifier"); + } + return true; + } + + +#pragma mark - LITERALS: + + + // Reads a numeric literal, storing it in the ValueSlot. + void readLiteralNumber(ValueSlot &inSlot) { + // Scan to the end of the number: + // (If the NumConversion.hh API used slice_istream I wouldn't have to do the scan) + auto start = _in.next(); + bool isNegative = (peek() == '-'); + if (isNegative || peek() == '+') + get(); + bool isFloat = false; + + char c; + do { + c = get(); + if (c == '.' || c == 'e' || c == 'E') + isFloat = true; + } while (isdigit(c) || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+'); + unget(); + auto numStr = string(slice(start, _in.next())); + + if (isFloat) { + double n; + if (ParseDouble(numStr.c_str(), n, false)) { + inSlot.set(n); + return; + } + } else if (isNegative) { + int64_t i; + if (ParseInteger(numStr.c_str(), i, false)) { + inSlot.set(i); + return; + } + } else { + uint64_t u; + if (ParseUnsignedInteger(numStr.c_str(), u, false)) { + inSlot.set(u); + return; + } + } + fail(("Invalid numeric literal " + numStr).c_str()); + } + + + // Reads a string literal in JSON5 format, returning its value + string readLiteralString() { + string out; + const char quote = get(); // single or double-quote + char c; + while (quote != (c = get())) { + if (c == '\\') { + switch ((c = get())) { + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\n'; break; + case 'u': fail("Unicode escapes not supported"); + // default is to leave c alone + } + } else if (c < ' ') { + fail("control character in string literal"); + } + out += c; + } + return out; + } + + +#pragma mark - LEXER: + + + // Reads alphanumeric characters, returning the identifier as a string. + // (The 1st char is accepted even if not alphanumeric, on the assumption the caller already + // peeked at and approved it.) + slice readIdentifier() { + auto start = _in.next(); + get(); // consume the char the caller peeked + while (true) { + char c = peek(); + if (isalnum(c) || c == '_') + get(); + else + break; + } + return slice(start, _in.next()); + } + + + // Reads an identifier and fails if it isn't equal to `expected`. + void readIdentifier(slice expected) { + if (readIdentifier() != expected) + fail("unknown identifier"); + } + + + // Reads & ignores a JSON5 comment. + void skipComment() { + char c; + get(); // consume initial '/' + switch (get()) { + case '/': + do { + c = peek(); + if (c) + get(); + } while (c != 0 && c != '\n' && c != '\r'); + break; + case '*': { + bool star; + c = 0; + do { + star = (c == '*'); + c = get(); + } while (!(star && c=='/')); + break; + } + default: + fail("syntax error"); + } + } + + + // Fails if anything remains in the input but whitespace. + void finished() { + if (peekToken() != 0) + fail("unexpected characters after end of spec"); + } + + + // Skips any whitespace and JSON5 comments, then returns a peek at the next character. + char peekToken() { + while (true) { + char c = peek(); + if (c == 0) { + return c; // EOF + } else if (isspace(c)) { + get(); // skip whitespace + } else if (c == '/') { + skipComment(); + } else { + return c; + } + } + } + + + // Returns the next character from the input without consuming it, or 0 at EOF. + char peek() { + return _in.peekByte(); + } + + + // Reads the next character from the input. Fails if input is at EOF. + char get() { + if (_in.eof()) + fail("unexpected end"); + return _in.readByte(); + } + + + void unget() { + _in.unreadByte(); + } + + + // Throws an exception. + [[noreturn]] + void fail(const char *error) { + slice prefix = _format.upTo(_in.next()), suffix = _format.from(_in.next()); + FleeceException::_throw(InvalidData, "Build(): %s in format: %.*s💥%.*s", + error, FMTSLICE(prefix), FMTSLICE(suffix)); + } + + + private: + slice const _format; // The entire format string + slice_istream _in; // Stream for reading _format + va_list _args; // The caller-provided arguments + }; + + +#pragma mark - PUBLIC API: + + + RetainedConst VBuild(const char *format, va_list args) { + return Builder(format, args).buildValue(); + } + + RetainedConst VBuild(slice format, va_list args) { + return Builder(format, args).buildValue(); + } + + + RetainedConst Build(const char *format, ...) { + va_list args; + va_start(args, format); + auto result = VBuild(format, args); + va_end(args); + return result; + } + + +#ifdef __APPLE__ + RetainedConst BuildCF(CFStringRef cfFormat, ...) { + va_list args; + va_start(args, cfFormat); + auto result = VBuild(nsstring_slice(cfFormat), args); + va_end(args); + return result; + } +#endif + + + void Put(MutableArray *array, const char *format, ...) { + va_list args; + va_start(args, format); + Builder(format, args).buildInto(array); + va_end(args); + } + + + void Put(MutableDict *dict, const char *format, ...) { + va_list args; + va_start(args, format); + Builder(format, args).buildInto(dict); + va_end(args); + } + + void VPut(Value *v, const char *format, va_list args) { + Builder builder(format, args); + if (const Dict *dict = v->asDict()) { + MutableDict *mutableDict = dict->asMutable(); + assert(mutableDict); + builder.buildInto(mutableDict); + } else { + MutableArray *mutableArray = v->asArray()->asMutable(); + assert(mutableArray); + builder.buildInto(mutableArray); + } + } + +} diff --git a/Fleece/Core/Builder.hh b/Fleece/Core/Builder.hh new file mode 100644 index 00000000..188a67a4 --- /dev/null +++ b/Fleece/Core/Builder.hh @@ -0,0 +1,68 @@ +// +// Builder.hh +// +// Copyright © 2021 Couchbase. All rights reserved. +// + +#pragma once +#include "FleeceImpl.hh" +#include "slice_stream.hh" +#include + +namespace fleece::impl::builder { + + /** Creates a MutableArray or MutableDict by reading the format string and following arguments. + The format string is basically JSON5, except that any value in it may be a printf-style + '%' specifier instead of a literal, in which case that value will be read from the next + argument. The supported format specifiers are: + - Boolean: `%c` (cast the arg to `char` to avoid a compiler warning) + - Integer: `%i` or `%d` (use size specifiers `l`, `ll`, or `z`) + - Unsigned integer: `%u` (use size specifiers `l`, `ll`, or `z`) + - Floating point: `%f` (arg can be `float` or `double`; no size spec needed) + - C string: `%s` + - Ptr+length string: `%.*s` (takes two args, a `const char*` and an `int`. See FMTSLICE.) + - Fleece value: `%p` (arg must be a `const Value*` or `FLValue`) + + A `-` can appear after the `%`, indicating that the argument should be ignored if it has + a default value, namely `false`, 0, or an empty string. This means the corresponding item + won't be written (a Dict item will be erased if it previously existed.) + + If a string/value specifier is given a NULL pointer, nothing is written, and any + pre-existing Dict item will be removed. + + \note It's legal for a Dict key to be repeated; later occurrences take precedence, + i.e. each one overwrites the last. + + @param format The format string. The following arguments will be type-checked against + its `%`-specifiers, if the compiler supports that. + @return A new non-null mutable Fleece value, either an array or dict depending on the outer + delimiter of the format string. + @throw A \ref FleeceException with code `InvalidData` if there's a syntax error in the + format string, either in JSON5 or a `%`-specifier. The exception message highlights the + location where the error occurred. */ + RetainedConst Build(const char *format, ...) __printflike(1, 2); + + + /** Variant of \ref Build that takes a pre-existing `va_list`. */ + RetainedConst VBuild(const char *format, va_list args); + RetainedConst VBuild(slice format, va_list args); + + + /** Like \ref Build, except the properties are stored in (appended to) an existing Array. */ + void Put(MutableArray*, const char *format, ...) __printflike(2, 3); + + /** Like \ref Build, except the properties are stored into an existing Dict. + (Pre-existing properties not appearing in the format string are preserved.) */ + void Put(MutableDict*, const char *format, ...) __printflike(2, 3); + + /** Variant of \ref Put that takes a pre-existing `va_list`. */ + void VPut(Value*, const char *format, va_list args); + +#ifdef __APPLE__ + /** Variant of Build that allows `%@` for [Core]Foundation values; the corresponding arg + must be a `CFStringRef`, `CFNumberRef`, `CFArrayRef` or `CFDictionaryRef`. + \note The format string is a `CFStringRef` not a `char*`, because `CF_FORMAT_FUNCTION` + requires that. */ + RetainedConst BuildCF(CFStringRef format, ...) CF_FORMAT_FUNCTION(1, 2); +#endif +} diff --git a/Fleece/Support/slice_stream.hh b/Fleece/Support/slice_stream.hh index da6a2817..13ac42d5 100644 --- a/Fleece/Support/slice_stream.hh +++ b/Fleece/Support/slice_stream.hh @@ -181,6 +181,10 @@ namespace fleece { /// Reads the next byte. If the stream is already at EOF, returns 0. uint8_t readByte() noexcept; + /// Un-does the last call to \ref readByte, i.e. moves back one byte. + /// \warning Not range checked: moving back before the start is undefined behavior. + void unreadByte() noexcept {slice::moveStart(-1);} + /// Returns the next byte, or 0 if at EOF, but does not advance the stream. uint8_t peekByte() const noexcept FLPURE {return (size > 0) ? (*this)[0] : 0;} diff --git a/ObjC/Encoder+ObjC.mm b/ObjC/Encoder+ObjC.mm index 2429066f..77a49b4e 100644 --- a/ObjC/Encoder+ObjC.mm +++ b/ObjC/Encoder+ObjC.mm @@ -14,7 +14,7 @@ #import "Encoder.hh" #import "Fleece+ImplGlue.hh" #import "FleeceException.hh" -#import "Fleece+CoreFoundation.h" +#import "fleece/Fleece+CoreFoundation.h" using namespace fleece::impl; diff --git a/Tests/BuilderTests.cc b/Tests/BuilderTests.cc new file mode 100644 index 00000000..5ef99cf3 --- /dev/null +++ b/Tests/BuilderTests.cc @@ -0,0 +1,169 @@ +// +// BuilderTests.cc +// +// Copyright © 2021 Couchbase. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "FleeceTests.hh" +#include "Builder.hh" +#include + +#ifdef __APPLE__ +#include +#endif + +using namespace fleece::impl; + + +TEST_CASE("Builder Empty", "[Builder]") { + auto v = builder::Build("{}"); + REQUIRE(v); + CHECK(v->toJSONString() == "{}"); + + v = builder::Build("[]"); + REQUIRE(v); + CHECK(v->toJSONString() == "[]"); + + v = builder::Build(" \t{ \n } "); + REQUIRE(v); + CHECK(v->toJSONString() == "{}"); + + v = builder::Build(" [ ] "); + REQUIRE(v); + CHECK(v->toJSONString() == "[]"); +} + + +TEST_CASE("Builder Literals", "[Builder]") { + auto v = builder::Build("[null, false, true, 0, 1, -12, +123, 123.5, -123.5, +123.5, 123e-4]"); + REQUIRE(v); + CHECK(v->toJSONString() == "[null,false,true,0,1,-12,123,123.5,-123.5,123.5,0.0123]"); +} + + +TEST_CASE("Builder String Literals", "[Builder]") { + auto v = builder::Build(R"({a : 'foo\'', $b : "bar\"rab", _c_ : "", _ : "\r\\"})"); + REQUIRE(v); + std::string expected = R"({"$b":"bar\"rab","_":"\r\\","_c_":"","a":"foo'"})"; + CHECK(v->toJSONString() == expected); +} + + +TEST_CASE("Builder Basic Dict", "[Builder]") { + auto v = builder::Build("{name:%s, size:%d, weight:%f}", + "Zegpold", 12, 3.14); + auto dict = v->asDict(); + REQUIRE(dict); + CHECK(dict->get("name")->asString() == "Zegpold"); + CHECK(dict->get("size")->asInt() == 12); + CHECK(dict->get("weight")->asDouble() == 3.14); + CHECK(v->toJSONString() == R"({"name":"Zegpold","size":12,"weight":3.14})"); +} + + +TEST_CASE("Builder Basic Array", "[Builder]") { + auto v = builder::Build("[%s, %d, %f]", + "Zegpold", 12, 3.14); + auto array = v->asArray(); + REQUIRE(array); + CHECK(array->get(0)->asString() == "Zegpold"); + CHECK(array->get(1)->asInt() == 12); + CHECK(array->get(2)->asDouble() == 3.14); + CHECK(v->toJSONString() == R"(["Zegpold",12,3.14])"); +} + + +TEST_CASE("Builder Nesting", "[Builder]") { + auto v = builder::Build("{name:%s, coords:[%d, %d], info:{nickname:%s}}", + "Zegpold", 4, 5, "Zeggy"); + CHECK(v->toJSONString() == R"({"coords":[4,5],"info":{"nickname":"Zeggy"},"name":"Zegpold"})"); +} + + +TEST_CASE("Builder Bool Params", "[Builder]") { + bool t = true, f = false; + auto v = builder::Build("[%c,%c]", char(t), char(f)); + CHECK(v->toJSONString() == R"([true,false])"); +} + + +TEST_CASE("Builder Integer Params", "[Builder]") { + int i0 = INT_MIN, i1 = INT_MAX; + unsigned u = UINT_MAX; + long l0 = LONG_MIN, l1 = LONG_MAX; + unsigned long ul = ULONG_MAX; + long long ll0 = LLONG_MIN, ll1 = LLONG_MAX; + unsigned long long ull = ULLONG_MAX; + ptrdiff_t p0 = PTRDIFF_MIN, p1 = PTRDIFF_MAX; + size_t z1 = SIZE_MAX; + auto v = builder::Build("[[%d, %d, %u], [%ld,%ld,%lu], [%lld,%lld,%llu], [%zd,%zd,%zu]]", + i0, i1, u, l0, l1, ul, ll0, ll1, ull, p0, p1, z1); + std::string expected32 = "[-2147483648,2147483647,4294967295]"; + std::string expected64 = "[-9223372036854775808,9223372036854775807,18446744073709551615]"; + std::string expected = + "[" + (sizeof(int) == 8 ? expected64 : expected32) + "," + + (sizeof(long) == 8 ? expected64 : expected32) + "," + + (sizeof(long long) == 8 ? expected64 : expected32) + "," + + (sizeof(size_t) == 8 ? expected64 : expected32) + "]"; + CHECK(v->toJSONString() == expected); +} + + +TEST_CASE("Builder Value Params", "[Builder]") { + auto v1 = builder::Build("[%s, %d, %f]", + "Zegpold", 12, 3.14); + auto v2 = builder::Build("{v1: %p, v2: %p}", v1.get(), v1.get()); + CHECK(v2->toJSONString() == R"({"v1":["Zegpold",12,3.14],"v2":["Zegpold",12,3.14]})"); +} + + +TEST_CASE("Builder Empty Strings", "[Builder]") { + const char *str = ""; + slice sl(str); + auto v = builder::Build("{a:%s, b:%.*s, d:[%s, %.*s]}", + str, FMTSLICE(sl), str, FMTSLICE(sl)); + CHECK(v->toJSONString() == R"({"a":"","b":"","d":["",""]})"); +} + + +TEST_CASE("Builder Null Args", "[Builder]") { + const char *str = nullptr; + slice sl = nullslice; + const Value *val = nullptr; + auto v = builder::Build("{a:%s, b:%.*s, c:%p, d:[%s, %.*s, %p]}", + str, FMTSLICE(sl), val, str, FMTSLICE(sl), val); + CHECK(v->toJSONString() == R"({"d":[]})"); +} + + +TEST_CASE("Builder Default Suppression", "[Builder]") { + const char *str = ""; + slice sl(str); + auto v = builder::Build("[%-c, %-d, %-f, %-s, %-.*s]", + char(false), 0, 0.0, str, FMTSLICE(sl)); + CHECK(v->toJSONString() == R"([])"); +} + + +#ifdef __APPLE__ +TEST_CASE("Builder CoreFoundation Params", "[Builder]") { + CFStringRef str = CFSTR("Zegpold"); + int i = 12345678; + CFNumberRef n = CFNumberCreate(nullptr, kCFNumberIntType, &i); + auto v = builder::BuildCF(CFSTR("[%@, %@]"), + str, n); + CHECK(v->toJSONString() == R"(["Zegpold",12345678])"); +} +#endif // __APPLE__ diff --git a/cmake/platform_base.cmake b/cmake/platform_base.cmake index 0cdb463d..f6be43b5 100644 --- a/cmake/platform_base.cmake +++ b/cmake/platform_base.cmake @@ -10,6 +10,7 @@ function(set_source_files_base) Fleece/API_Impl/Fleece.cc Fleece/API_Impl/FLSlice.cc Fleece/Core/Array.cc + Fleece/Core/Builder.cc Fleece/Core/DeepIterator.cc Fleece/Core/Dict.cc Fleece/Core/Doc.cc @@ -70,6 +71,7 @@ function(set_test_source_files_base) Tests/DeltaTests.cc Tests/EncoderTests.cc Tests/FleeceTests.cc + Tests/BuilderTests.cc Tests/FleeceTestsMain.cc Tests/HashTreeTests.cc Tests/JSON5Tests.cc