Skip to content

Commit

Permalink
Support better type deduction
Browse files Browse the repository at this point in the history
o Empty/None values are ignored for deducing the type of a column
o Comma-separated numbers are allowed in for int and float types
  • Loading branch information
pjkundert committed Oct 7, 2024
1 parent cecb08e commit 8c9c9e7
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 7 deletions.
35 changes: 28 additions & 7 deletions tabulate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,8 +933,13 @@ def _isbool(string):
def _type(string, has_invisible=True, numparse=True):
"""The least generic type (type(None), int, float, str, unicode).
Treats empty string as missing for the purposes of type deduction, so as to not influence
the type of an otherwise complete column; does *not* result in missingval replacement!
>>> _type(None) is type(None)
True
>>> _type("") is type(None)
True
>>> _type("foo") is type("")
True
>>> _type("1") is type(1)
Expand All @@ -949,15 +954,26 @@ def _type(string, has_invisible=True, numparse=True):
if has_invisible and isinstance(string, (str, bytes)):
string = _strip_ansi(string)

if string is None:
if string is None or (isinstance(string, (bytes, str)) and not string):
return type(None)
elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
return str
elif _isbool(string):
return bool
elif _isint(string) and numparse:
elif numparse and (
_isint(string) or (
isinstance(string, str)
and _isnumber_with_thousands_separator(string)
and '.' not in string
)
):
return int
elif _isnumber(string) and numparse:
elif numparse and (
_isnumber(string) or (
isinstance(string, str)
and _isnumber_with_thousands_separator(string)
)
):
return float
elif isinstance(string, bytes):
return bytes
Expand Down Expand Up @@ -1251,7 +1267,7 @@ def _column_type(strings, has_invisible=True, numparse=True):


def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
"""Format a value according to its type.
"""Format a value according to its deduced type. Empty values are deemed valid for any type.
Unicode is supported:
Expand All @@ -1264,6 +1280,8 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
""" # noqa
if val is None:
return missingval
if isinstance(val, (bytes, str)) and not val:
return ""

if valtype is str:
return f"{val}"
Expand Down Expand Up @@ -1298,6 +1316,8 @@ def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
formatted_val = format(float(raw_val), floatfmt)
return val.replace(raw_val, formatted_val)
else:
if isinstance(val,str) and ',' in val:
val = val.replace(',', '') # handle thousands-separators
return format(float(val), floatfmt)
else:
return f"{val}"
Expand Down Expand Up @@ -1592,9 +1612,10 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True):

if width is not None:
wrapper = _CustomTextWrap(width=width)
# Cast based on our internal type handling
# Any future custom formatting of types (such as datetimes)
# may need to be more explicit than just `str` of the object
# Cast based on our internal type handling. Any future custom
# formatting of types (such as datetimes) may need to be more
# explicit than just `str` of the object. Also doesn't work for
# custom floatfmt/intfmt, nor with any missing/blank cells.
casted_cell = (
str(cell) if _isnumber(cell) else _type(cell, numparse)(cell)
)
Expand Down
33 changes: 33 additions & 0 deletions test/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2824,6 +2824,13 @@ def test_floatfmt():
assert_equal(expected, result)


def test_floatfmt_thousands():
"Output: floating point format"
result = tabulate([["1.23456789"], [1.0], ["1,234.56"]], floatfmt=".3f", tablefmt="plain")
expected = " 1.235\n 1.000\n1234.560"
assert_equal(expected, result)


def test_floatfmt_multi():
"Output: floating point format different for each column"
result = tabulate(
Expand Down Expand Up @@ -2964,6 +2971,32 @@ def test_missingval_multi():
assert_equal(expected, result)


def test_column_emptymissing_deduction():
"Missing or empty/blank values shouldn't change type deduction of rest of column"
from fractions import Fraction

test_table = [
[None, "1.23423515351", Fraction(1, 3)],
[Fraction(56789, 1000000), 12345.1, b"abc"],
["", b"", None],
[Fraction(10000, 3), None, ""],
]
result = tabulate(
test_table,
floatfmt=",.5g",
missingval="?",
)
print(f"\n{result}")
expected = """\
------------ ----------- ---
? 1.2342 1/3
0.056789 12,345 abc
?
3,333.3 ?
------------ ----------- ---"""
assert_equal(expected, result)


def test_column_alignment():
"Output: custom alignment for text and numbers"
expected = "\n".join(["----- ---", "Alice 1", " Bob 333", "----- ---"])
Expand Down

0 comments on commit 8c9c9e7

Please sign in to comment.