From 8c9c9e75c9a7da858e8ecf72236ee021c69e2d36 Mon Sep 17 00:00:00 2001 From: Perry Kundert Date: Thu, 20 Jul 2023 14:16:55 -0600 Subject: [PATCH] Support better type deduction o Empty/None values are ignored for deducing the type of a column o Comma-separated numbers are allowed in for int and float types --- tabulate/__init__.py | 35 ++++++++++++++++++++++++++++------- test/test_output.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index c349a79..8642c8d 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -933,8 +933,13 @@ def _isbool(string): def _type(string, has_invisible=True, numparse=True): """The least generic type (type(None), int, float, str, unicode). + Treats empty string as missing for the purposes of type deduction, so as to not influence + the type of an otherwise complete column; does *not* result in missingval replacement! + >>> _type(None) is type(None) True + >>> _type("") is type(None) + True >>> _type("foo") is type("") True >>> _type("1") is type(1) @@ -949,15 +954,26 @@ def _type(string, has_invisible=True, numparse=True): if has_invisible and isinstance(string, (str, bytes)): string = _strip_ansi(string) - if string is None: + if string is None or (isinstance(string, (bytes, str)) and not string): return type(None) elif hasattr(string, "isoformat"): # datetime.datetime, date, and time return str elif _isbool(string): return bool - elif _isint(string) and numparse: + elif numparse and ( + _isint(string) or ( + isinstance(string, str) + and _isnumber_with_thousands_separator(string) + and '.' not in string + ) + ): return int - elif _isnumber(string) and numparse: + elif numparse and ( + _isnumber(string) or ( + isinstance(string, str) + and _isnumber_with_thousands_separator(string) + ) + ): return float elif isinstance(string, bytes): return bytes @@ -1251,7 +1267,7 @@ def _column_type(strings, has_invisible=True, numparse=True): def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): - """Format a value according to its type. + """Format a value according to its deduced type. Empty values are deemed valid for any type. Unicode is supported: @@ -1264,6 +1280,8 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): """ # noqa if val is None: return missingval + if isinstance(val, (bytes, str)) and not val: + return "" if valtype is str: return f"{val}" @@ -1298,6 +1316,8 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True): formatted_val = format(float(raw_val), floatfmt) return val.replace(raw_val, formatted_val) else: + if isinstance(val,str) and ',' in val: + val = val.replace(',', '') # handle thousands-separators return format(float(val), floatfmt) else: return f"{val}" @@ -1592,9 +1612,10 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True): if width is not None: wrapper = _CustomTextWrap(width=width) - # Cast based on our internal type handling - # Any future custom formatting of types (such as datetimes) - # may need to be more explicit than just `str` of the object + # Cast based on our internal type handling. Any future custom + # formatting of types (such as datetimes) may need to be more + # explicit than just `str` of the object. Also doesn't work for + # custom floatfmt/intfmt, nor with any missing/blank cells. casted_cell = ( str(cell) if _isnumber(cell) else _type(cell, numparse)(cell) ) diff --git a/test/test_output.py b/test/test_output.py index 68b5e55..5b94e82 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -2824,6 +2824,13 @@ def test_floatfmt(): assert_equal(expected, result) +def test_floatfmt_thousands(): + "Output: floating point format" + result = tabulate([["1.23456789"], [1.0], ["1,234.56"]], floatfmt=".3f", tablefmt="plain") + expected = " 1.235\n 1.000\n1234.560" + assert_equal(expected, result) + + def test_floatfmt_multi(): "Output: floating point format different for each column" result = tabulate( @@ -2964,6 +2971,32 @@ def test_missingval_multi(): assert_equal(expected, result) +def test_column_emptymissing_deduction(): + "Missing or empty/blank values shouldn't change type deduction of rest of column" + from fractions import Fraction + + test_table = [ + [None, "1.23423515351", Fraction(1, 3)], + [Fraction(56789, 1000000), 12345.1, b"abc"], + ["", b"", None], + [Fraction(10000, 3), None, ""], + ] + result = tabulate( + test_table, + floatfmt=",.5g", + missingval="?", + ) + print(f"\n{result}") + expected = """\ +------------ ----------- --- + ? 1.2342 1/3 + 0.056789 12,345 abc + ? +3,333.3 ? +------------ ----------- ---""" + assert_equal(expected, result) + + def test_column_alignment(): "Output: custom alignment for text and numbers" expected = "\n".join(["----- ---", "Alice 1", " Bob 333", "----- ---"])