From 849cb68dcbaf86e0131f3513fb1748c32ae5449a Mon Sep 17 00:00:00 2001
From: Joao Paulo Magalhaes <dev@jpmag.me>
Date: Wed, 3 Apr 2024 04:29:42 +0100
Subject: [PATCH] Add quickstart example addressing float precision and
 avoiding loss.

Re #417
---
 samples/quickstart.cpp | 144 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 142 insertions(+), 2 deletions(-)
diff --git a/samples/quickstart.cpp b/samples/quickstart.cpp
index 4e953768b..96ff5baae 100644
--- a/samples/quickstart.cpp
+++ b/samples/quickstart.cpp
@@ -70,6 +70,7 @@ void sample_base64();               ///< encode/decode base64
 void sample_user_scalar_types();    ///< serialize/deserialize scalar (leaf/string) types
 void sample_user_container_types(); ///< serialize/deserialize container (map or seq) types
 void sample_std_types();            ///< serialize/deserialize STL containers
+void sample_float_precision();      ///< control precision of serialized floats
 void sample_emit_to_container();    ///< emit to memory, eg a string or vector-like container
 void sample_emit_to_stream();       ///< emit to a stream, eg std::ostream
 void sample_emit_to_file();         ///< emit to a FILE*
@@ -105,6 +106,7 @@ int main()
     sample::sample_base64();
     sample::sample_user_scalar_types();
     sample::sample_user_container_types();
+    sample::sample_float_precision();
     sample::sample_std_types();
     sample::sample_emit_to_container();
     sample::sample_emit_to_stream();
@@ -2169,6 +2171,7 @@ void sample_fundamental_types()
     CHECK(tree.to_arena(c4::fmt::boolalpha(false)) == "false"); CHECK(tree.arena() == "abcde0101234-45-56-67-70x10.1240.23410truefalse");
 
     // write special float values
+    // see also sample_float_precision()
     const float  fnan = std::numeric_limits<float >::quiet_NaN();
     const double dnan = std::numeric_limits<double>::quiet_NaN();
     const float  finf = std::numeric_limits<float >::infinity();
@@ -2181,6 +2184,7 @@ void sample_fundamental_types()
     CHECK(tree.to_arena( dnan) ==  ".nan"); CHECK(tree.arena() == "abcde0101234-45-56-67-70x10.1240.23410truefalse.inf.inf-.inf-.inf.nan.nan");
 
     // read special float values
+    // see also sample_float_precision()
     C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal");
     tree = ryml::parse_in_arena(R"({ninf: -.inf, pinf: .inf, nan: .nan})");
     float f = 0.f;
@@ -2218,6 +2222,8 @@ non_null: [nULL, non_null, non null, null it is not]
     CHECK(tree["literal"].val().str != nullptr);
     CHECK(tree["folded"].val().str != nullptr);
     // likewise, scalar comparison to nullptr has the same results:
+    // (remember that .val() gives you the scalar value, node must
+    // have a val, ie must be a leaf node, not a container)
     CHECK(tree["plain"].val() == nullptr);
     CHECK(tree["squoted"].val() != nullptr);
     CHECK(tree["dquoted"].val() != nullptr);
@@ -2225,7 +2231,7 @@ non_null: [nULL, non_null, non null, null it is not]
     CHECK(tree["folded"].val() != nullptr);
     // the tree and node classes provide the corresponding predicate
     // functions .key_is_val() and .val_is_null():
-    CHECK(tree["plain"].val_is_null());
+    CHECK(tree["plain"].val_is_null()); // requires same preconditions as .val()
     CHECK( ! tree["squoted"].val_is_null());
     CHECK( ! tree["dquoted"].val_is_null());
     CHECK( ! tree["literal"].val_is_null());
@@ -2679,6 +2685,7 @@ void sample_formatting()
         // ------------------------------------------
         // fmt::real(): format floating point numbers
         // ------------------------------------------
+        // see also sample_float_precision()
         CHECK("0"       == cat_sub(buf, fmt::real(0.01f, 0)));
         CHECK("0.0"     == cat_sub(buf, fmt::real(0.01f, 1)));
         CHECK("0.01"    == cat_sub(buf, fmt::real(0.01f, 2)));
@@ -3294,7 +3301,7 @@ void sample_std_types()
     21003: 22003
 )";
     // parse in-place using the std::string above
-    auto tree = ryml::parse_in_place(ryml::to_substr(yml_std_string));
+    ryml::Tree tree = ryml::parse_in_place(ryml::to_substr(yml_std_string));
     // my_type is a container-of-containers type. see above its
     // definition implementation for ryml.
     std::vector<my_type> vmt;
@@ -3306,6 +3313,139 @@ void sample_std_types()
 }
 
 
+//-----------------------------------------------------------------------------
+
+/** control precision of serialized floats */
+void sample_float_precision()
+{
+    std::vector<double> reference{1.23234412342131234, 2.12323123143434237, 3.67847983572591234};
+    const size_t num_digits_original = 17;
+    const size_t num_digits_safe = 14;
+    // a safe precision for comparing doubles. may vary depending on compiler flags.
+    // double goes to about 15 digits, so 14 should be safe enough.
+    const double precision_safe = 1.e-14;
+    auto get_num_digits = [](ryml::csubstr number){ return number.sub(2).len; };
+    //
+    // no significant precision is lost when reading
+    // floating point numbers:
+    {
+        ryml::Tree tree = ryml::parse_in_arena(R"([1.23234412342131234, 2.12323123143434237, 3.67847983572591234])");
+        std::vector<double> output;
+        tree.rootref() >> output;
+        CHECK(output.size() == reference.size());
+        for(size_t i = 0; i < reference.size(); ++i)
+        {
+            CHECK(get_num_digits(tree[i].val()) == num_digits_original);
+            CHECK(fabs(output[i] - reference[i]) < precision_safe);
+        }
+    }
+    //
+    // However, depending on the compilation settings, there may be a
+    // significant precision loss when serializing with the default
+    // approach, operator<<(double):
+    {
+        ryml::Tree serialized;
+        serialized.rootref() << reference;
+        std::cout << serialized;
+        //
+        #if C4_CPP >= 17  // when using C++17 and above, the results are eminently equal:
+        CHECK((ryml::emitrs_yaml<std::string>(serialized) == R"(- 1.23234
+- 1.2323441234213124
+- 2.1232312314343424
+- 3.6784798357259123
+)") || (bool)"this is indicative; the exact results will vary from platform to platform.");
+        size_t pos = 0;
+        for(ryml::ConstNodeRef child : serialized.rootref().children())
+        {
+            CHECK(get_num_digits(child.val()) >= num_digits_safe);
+            double out = {};
+            child >> out;
+            CHECK(fabs(out - reference[pos++]) < precision_safe);
+        }
+        #else // HOWEVER, with C++ < 17:
+        CHECK(ryml::emitrs_yaml<std::string>(serialized) == R"(- 1.23234
+- 2.12323
+- 3.67848
+)" || (bool)"this is indicative; the exact results will vary from platform to platform.");
+        C4_UNUSED(num_digits_safe);
+        #endif
+    }
+    //
+    // The difference is explained by the availability of
+    // fastfloat::from_chars(), std::from_chars() and std::to_chars().
+    //
+    // ryml prefers the fastfloat::from_chars() version. Unfortunately
+    // fastfloat does not have to_chars() (see
+    // https://github.com/fastfloat/fast_float/issues/23).
+    //
+    // When C++17 is used, ryml uses std::to_chars(), which produces
+    // good defaults.
+    //
+    // However, with earlier standards, there's only snprintf()
+    // available. Every other std library function will either disrespect
+    // the string limits, or more precisely, accept no string size
+    // limits. So the implementation of c4core (which ryml uses)
+    // falls back to snprintf("%g"), and that picks by default a (low)
+    // number of digits.
+    //
+    // But all is not lost for C++11/C++14 users!
+    //
+    // To force a particular precision when serializing, you can use
+    // c4::fmt::real() (brought into the ryml:: namespace). Or you can
+    // serialize the number yourself! The small downside is that you
+    // have to build the container.
+    //
+    // First a function to check the result:
+    auto check_precision = [&](ryml::Tree serialized){
+        std::cout << serialized;
+        // now it works!
+        CHECK((ryml::emitrs_yaml<std::string>(serialized) == R"(- 1.23234
+- 1.23234412342131239
+- 2.12323123143434245
+- 3.67847983572591231
+)") || (bool)"this is indicative; the exact results will vary from platform to platform.");
+        size_t pos = 0;
+        for(ryml::ConstNodeRef child : serialized.rootref().children())
+        {
+            CHECK(get_num_digits(child.val()) == num_digits_original);
+            double out = {};
+            child >> out;
+            CHECK(fabs(out - reference[pos++]) < precision_safe); // now within bounds!
+        }
+    };
+    //
+    // Serialization example using fmt::real()
+    {
+        ryml::Tree serialized;
+        ryml::NodeRef root = serialized.rootref();
+        root |= ryml::SEQ;
+        for(const double v : reference)
+            root.append_child() << ryml::fmt::real(v, num_digits_original, ryml::FTOA_FLOAT);
+        check_precision(serialized); // OK!
+    }
+    //
+    // Serialization example using snprintf
+    {
+        ryml::Tree serialized;
+        ryml::NodeRef root = serialized.rootref();
+        root |= ryml::SEQ;
+        char tmp[64];
+        for(const double v : reference)
+        {
+            // reuse a buffer to serialize.
+            // add 1 to the significant digits because the %g
+            // specifier counts the integral digits.
+            (void)snprintf(tmp, sizeof(tmp), "%.18g", v);
+            // copy the serialized string to the tree (operator<<
+            // copies to the arena, operator= just assigns the string
+            // pointer and would be wrong in this case):
+            root.append_child() << ryml::to_csubstr((const char*)tmp);
+        }
+        check_precision(serialized); // OK!
+    }
+}
+
+
 //-----------------------------------------------------------------------------
 
 /** demonstrates how to emit to a linear container of char */