From 9943d54379e46dd10974a1a096a3cbae4fc1771a Mon Sep 17 00:00:00 2001
From: Oskars Rubenis <104319900+Okarss@users.noreply.github.com>
Date: Sun, 14 Jan 2024 00:14:57 +0200
Subject: [PATCH] Introduce rounding (#255)

* Introduce rounding

* Fix the sign conversion warning

* Fix the unit_ftoa_rev.cc tests

* Retarget the submodule for testing

* Increase the precision to 9 digits

* Retarget the submodule to main
---
 CONTRIBUTORS.md           |  1 +
 README.md                 |  2 +-
 nanoprintf.h              | 26 +++++++++++++++++++-------
 tests/mpaland-conformance |  2 +-
 tests/unit_fsplit_abs.cc  | 34 +++++++++++++++++-----------------
 tests/unit_ftoa_rev.cc    |  5 ++++-
 6 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index c78f8ae..bffbd43 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -7,3 +7,4 @@
 * [Shreyas Balakrishna](https://github.com/shreyasbharath)
 * [Jim Keener](https://github.com/jimktrains)
 * [Dean T](https://github.com/deanoburrito)
+* [Oskars Rubenis](https://github.com/Okarss)
diff --git a/README.md b/README.md
index 7f78f3f..4845746 100644
--- a/README.md
+++ b/README.md
@@ -157,7 +157,7 @@ Like `printf`, `nanoprintf` expects a conversion specification string of the fol
 
 ## Floating Point
 
-Floating point conversion is performed by extracting the value into 64:64 fixed-point with an extra field that specifies the number of leading zero fractional digits before the first nonzero digit. No rounding is currently performed; values are simply truncated at the specified precision. This is done for simplicity, speed, and code footprint.
+Floating point conversion is performed by extracting the value into 64:64 fixed-point with an extra field that specifies the number of leading zero fractional digits before the first nonzero digit. This is done for simplicity, speed, and code footprint.
 
 Because the float -> fixed code operates on the raw float value bits, no floating point operations are performed. This allows nanoprintf to efficiently format floats on soft-float architectures like Cortex-M0, and to function identically with or without optimizations like "fast math". Despite `nano` in the name, there's no way to do away with double entirely, since the C language standard says that floats are promoted to double any time they're passed into variadic argument lists. nanoprintf casts all doubles back down to floats before doing any conversions. No other single- or double- precision operations are performed.
 
diff --git a/nanoprintf.h b/nanoprintf.h
index 8273764..cec038b 100644
--- a/nanoprintf.h
+++ b/nanoprintf.h
@@ -272,7 +272,7 @@ static int npf_fsplit_abs(float f,
                           uint64_t *out_int_part,
                           uint64_t *out_frac_part,
                           int *out_frac_base10_neg_e);
-static int npf_ftoa_rev(char *buf, float f, char case_adj, int *out_frac_chars);
+static int npf_ftoa_rev(char *buf, float f, npf_format_spec_t const *spec, int *out_frac_chars);
 #endif
 
 #if NANOPRINTF_USE_BINARY_FORMAT_SPECIFIERS == 1
@@ -498,7 +498,7 @@ enum {
   NPF_EXPONENT_BITS = 8,
   NPF_EXPONENT_BIAS = 127,
   NPF_FRACTION_BIN_DIGITS = 64,
-  NPF_MAX_FRACTION_DEC_DIGITS = 8
+  NPF_MAX_FRACTION_DEC_DIGITS = 9
 };
 
 int npf_fsplit_abs(float f, uint64_t *out_int_part, uint64_t *out_frac_part,
@@ -571,7 +571,7 @@ int npf_fsplit_abs(float f, uint64_t *out_int_part, uint64_t *out_frac_part,
   return 1;
 }
 
-int npf_ftoa_rev(char *buf, float f, char case_adj, int *out_frac_chars) {
+int npf_ftoa_rev(char *buf, float f, npf_format_spec_t const *spec, int *out_frac_chars) {
   uint32_t f_bits; { // union-cast is UB, let compiler optimize byte-copy loop.
     char const *src = (char const *)&f;
     char *dst = (char *)&f_bits;
@@ -580,9 +580,9 @@ int npf_ftoa_rev(char *buf, float f, char case_adj, int *out_frac_chars) {
 
   if ((uint8_t)(f_bits >> 23) == 0xFF) {
     if (f_bits & 0x7fffff) {
-      for (int i = 0; i < 3; ++i) { *buf++ = (char)("NAN"[i] + case_adj); }
+      for (int i = 0; i < 3; ++i) { *buf++ = (char)("NAN"[i] + spec->case_adjust); }
     } else {
-      for (int i = 0; i < 3; ++i) { *buf++ = (char)("FNI"[i] + case_adj); }
+      for (int i = 0; i < 3; ++i) { *buf++ = (char)("FNI"[i] + spec->case_adjust); }
     }
     return -3;
   }
@@ -590,7 +590,7 @@ int npf_ftoa_rev(char *buf, float f, char case_adj, int *out_frac_chars) {
   uint64_t int_part, frac_part;
   int frac_base10_neg_exp;
   if (npf_fsplit_abs(f, &int_part, &frac_part, &frac_base10_neg_exp) == 0) {
-    for (int i = 0; i < 3; ++i) { *buf++ = (char)("ROO"[i] + case_adj); }
+    for (int i = 0; i < 3; ++i) { *buf++ = (char)("ROO"[i] + spec->case_adjust); }
     return -3;
   }
 
@@ -604,6 +604,18 @@ int npf_ftoa_rev(char *buf, float f, char case_adj, int *out_frac_chars) {
   // write the 0 digits between the . and the first fractional digit
   while (frac_base10_neg_exp-- > 0) { *dst++ = '0'; }
   *out_frac_chars = (int)(dst - buf);
+
+  // round the value to the specified precision
+  if (spec->prec < *out_frac_chars) {
+    char *digit = dst - spec->prec - 1;
+    unsigned carry = (*digit >= '5');
+    while (carry && (++digit < dst)) {
+      carry = (*digit == '9');
+      *digit = carry ? '0' : (*digit + 1);
+    }
+    int_part += carry; // overflow is not possible
+  }
+
   *dst++ = '.';
 
   // write the integer digits
@@ -880,7 +892,7 @@ int npf_vpprintf(npf_putc pc, void *pc_ctx, char const *format, va_list args) {
 #if NANOPRINTF_USE_FIELD_WIDTH_FORMAT_SPECIFIERS == 1
         zero = (val == 0.f);
 #endif
-        cbuf_len = npf_ftoa_rev(cbuf, val, fs.case_adjust, &frac_chars);
+        cbuf_len = npf_ftoa_rev(cbuf, val, &fs, &frac_chars);
 
         if (cbuf_len < 0) {
           cbuf_len = -cbuf_len;
diff --git a/tests/mpaland-conformance b/tests/mpaland-conformance
index 550f932..38f318f 160000
--- a/tests/mpaland-conformance
+++ b/tests/mpaland-conformance
@@ -1 +1 @@
-Subproject commit 550f932df0fcf3d41c875fec4b5e57faaaa9ce66
+Subproject commit 38f318ff21fc44a97f81506b2419a03ef90b1413
diff --git a/tests/unit_fsplit_abs.cc b/tests/unit_fsplit_abs.cc
index 18c7d51..cde8fbf 100644
--- a/tests/unit_fsplit_abs.cc
+++ b/tests/unit_fsplit_abs.cc
@@ -40,8 +40,8 @@ TEST_CASE("npf_fsplit_abs") {
   // fractional base-10 negative exponent
   require_fsplit_abs(0.03125f, 0, 3125, 1);
   require_fsplit_abs(0.0078125f, 0, 78125, 2);
-  require_fsplit_abs(2.4414062E-4f, 0, 24414062, 3);
-  require_fsplit_abs(3.8146973E-6f, 0, 38146972, 5);
+  require_fsplit_abs(2.4414062E-4f, 0, 244140625, 3);
+  require_fsplit_abs(3.8146973E-6f, 0, 381469726, 5);
 
   // perfectly-representable fractions, adding 1 bit to mantissa each time.
   require_fsplit_abs(1.5f, 1, 5, 0);
@@ -55,19 +55,19 @@ TEST_CASE("npf_fsplit_abs") {
   require_fsplit_abs(1.9960938f, 1, 99609375, 0); // first truncation divergence.
 
   // truncations, but continue adding mantissa bits
-  require_fsplit_abs(1.9980469f, 1, 99804687, 0); // 1.998046875 is stored.
-  require_fsplit_abs(1.9990234f, 1, 99902343, 0); // 1.9990234375 is stored.
-  require_fsplit_abs(1.9995117f, 1, 99951171, 0); // 1.99951171875 is stored.
-  require_fsplit_abs(1.9997559f, 1, 99975585, 0); // 1.999755859375 is stored.
-  require_fsplit_abs(1.9998779f, 1, 99987792, 0); // 1.9998779296875 is stored.
-  require_fsplit_abs(1.999939f,  1, 99993896, 0); // 1.99993896484375 is stored.
-  require_fsplit_abs(1.9999695f, 1, 99996948, 0); // 1.999969482421875 is stored.
-  require_fsplit_abs(1.9999847f, 1, 99998474, 0); // 1.9999847412109375 is stored.
-  require_fsplit_abs(1.9999924f, 1, 99999237, 0); // 1.99999237060546875 is stored.
-  require_fsplit_abs(1.9999962f, 1, 99999618, 0); // 1.999996185302734375 is stored.
-  require_fsplit_abs(1.9999981f, 1, 99999809, 0); // 1.9999980926513671875 is stored.
-  require_fsplit_abs(1.999999f,  1, 99999904, 0); // 1.99999904632568359375 is stored.
-  require_fsplit_abs(1.9999995f, 1, 99999952, 0); // 1.999999523162841796875 is stored.
-  require_fsplit_abs(1.9999998f, 1, 99999976, 0); // 1.9999997615814208984375 is stored.
-  require_fsplit_abs(1.9999999f, 1, 99999988, 0); // 1.99999988079071044921875 is stored.
+  require_fsplit_abs(1.9980469f, 1, 998046875, 0); // 1.998046875 is stored.
+  require_fsplit_abs(1.9990234f, 1, 999023437, 0); // 1.9990234375 is stored.
+  require_fsplit_abs(1.9995117f, 1, 999511718, 0); // 1.99951171875 is stored.
+  require_fsplit_abs(1.9997559f, 1, 999755859, 0); // 1.999755859375 is stored.
+  require_fsplit_abs(1.9998779f, 1, 999877929, 0); // 1.9998779296875 is stored.
+  require_fsplit_abs(1.999939f,  1, 999938964, 0); // 1.99993896484375 is stored.
+  require_fsplit_abs(1.9999695f, 1, 999969482, 0); // 1.999969482421875 is stored.
+  require_fsplit_abs(1.9999847f, 1, 999984741, 0); // 1.9999847412109375 is stored.
+  require_fsplit_abs(1.9999924f, 1, 999992370, 0); // 1.99999237060546875 is stored.
+  require_fsplit_abs(1.9999962f, 1, 999996185, 0); // 1.999996185302734375 is stored.
+  require_fsplit_abs(1.9999981f, 1, 999998092, 0); // 1.9999980926513671875 is stored.
+  require_fsplit_abs(1.999999f,  1, 999999046, 0); // 1.99999904632568359375 is stored.
+  require_fsplit_abs(1.9999995f, 1, 999999523, 0); // 1.999999523162841796875 is stored.
+  require_fsplit_abs(1.9999998f, 1, 999999761, 0); // 1.9999997615814208984375 is stored.
+  require_fsplit_abs(1.9999999f, 1, 999999880, 0); // 1.99999988079071044921875 is stored.
 }
diff --git a/tests/unit_ftoa_rev.cc b/tests/unit_ftoa_rev.cc
index b20f8c3..f5cc575 100644
--- a/tests/unit_ftoa_rev.cc
+++ b/tests/unit_ftoa_rev.cc
@@ -12,11 +12,14 @@
 
 TEST_CASE("ftoa_rev") {
   char buf[64];
+  npf_format_spec_t spec;
   int frac_bytes;
   memset(buf, 0, sizeof(buf));
+  memset(&spec, 0, sizeof(spec));
+  spec.prec = 1;
 
   SUBCASE("zero") {
-    REQUIRE(npf_ftoa_rev(buf, 0.f, 0, &frac_bytes) == 2);
+    REQUIRE(npf_ftoa_rev(buf, 0.f, &spec, &frac_bytes) == 2);
     REQUIRE(std::string{".0"} == buf);
   }
 }