Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto generate excessive long parametrized ids #7254

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
20 changes: 13 additions & 7 deletions src/_pytest/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,19 +1181,25 @@ def _idval(
if hook_id:
return hook_id

modified_val = None # empty strings are valid as val
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a better name for this variable would be id. Because I see val as the input value, and the modified_val here is actually the intended ID string for that value.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks @bluetech will revisit this during this week. - Any other thoughts on a more appropriate name? I wouldnt want to shadow the id() builtin here

if isinstance(val, STRING_TYPES):
return _ascii_escaped_by_config(val, config)
modified_val = _ascii_escaped_by_config(val, config)
elif val is None or isinstance(val, (float, int, bool)):
return str(val)
modified_val = str(val)
elif isinstance(val, REGEX_TYPE):
return ascii_escaped(val.pattern)
modified_val = ascii_escaped(val.pattern)
elif isinstance(val, enum.Enum):
return str(val)
modified_val = str(val)
elif isinstance(getattr(val, "__name__", None), str):
# name of a class, function, module, etc.
name = getattr(val, "__name__") # type: str
return name
return str(argname) + str(idx)
modified_val = getattr(val, "__name__")

# Check if the post-checks value is greater than 100 chars in length and use auto id gen if so
# isinstance checks can return empty strings which is considered valid, so we explicitly check None on modified_val
# for example @pytest.mark.parametrize('x', ('', ' ')) is acceptable
if modified_val is None or len(modified_val) > 100:
return str(argname) + str(idx)
return modified_val


def _idvalset(
Expand Down
40 changes: 37 additions & 3 deletions testing/python/metafunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,8 @@ def test_unicode_idval(self) -> None:
("ação", "a\\xe7\\xe3o"),
("josé@blah.com", "jos\\xe9@blah.com"),
(
"δοκ.ιμή@παράδειγμα.δοκιμή",
"\\u03b4\\u03bf\\u03ba.\\u03b9\\u03bc\\u03ae@\\u03c0\\u03b1\\u03c1\\u03ac\\u03b4\\u03b5\\u03b9\\u03b3"
"\\u03bc\\u03b1.\\u03b4\\u03bf\\u03ba\\u03b9\\u03bc\\u03ae",
"δοκ.ιμή@δοκιμή",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

converted bytes now exceed the 100 limit, unsure of the data set so shortened it, not sure if acceptable

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one makes me wonder if a "character" limit of 100 might be better than a bytes limit of 100

as far as i can guess the source data is a most interesting mail address and the shortened one isnt

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why we even escape ID names to ASCII? Is there a reason not to allow Unicode IDs?

OK found the answer: disable_test_id_escaping_and_forfeit_all_rights_to_community_support hehe.

"\\u03b4\\u03bf\\u03ba.\\u03b9\\u03bc\\u03ae@\\u03b4\\u03bf\\u03ba\\u03b9\\u03bc\\u03ae",
),
]
for val, expected in values:
Expand Down Expand Up @@ -1899,3 +1898,38 @@ def test_converted_to_str(a, b):
"*= 6 passed in *",
]
)


def test_auto_generated_long_parametrized_ids(testdir):
testdir.makepyfile(
"""
import pytest
from datetime import datetime
from enum import Enum

class CustomEnum(Enum):
OVER = "over" * 26
UNDER = "under" * 2

@pytest.mark.parametrize(
"value, expected",
[
("*" * 101, True),
(b"*" * 101, True),
(25, False),
(50.25, False),
("*" * 50, False),
(True, False),
(datetime(2001, 12, 12), True),
(CustomEnum.OVER, False),
(datetime, False),
(dir, False),
])
def test_parametrized_auto_generating_long(request, value, expected):
node_name = request.node.name
was_rewritten = "value" in node_name
assert was_rewritten == expected
"""
)
result = testdir.runpytest()
result.assert_outcomes(passed=10)