diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 07b77d3e04bbe0..857579f758386f 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -9,10 +9,35 @@ _testcapi = None +NULL = None + +class Str(str): + pass + + class CAPITest(unittest.TestCase): - # Test PyUnicode_FromFormat() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromobject(self): + """Test PyUnicode_FromObject()""" + from _testcapi import unicode_fromobject as fromobject + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertEqual(fromobject(s), s) + o = Str(s) + s2 = fromobject(o) + self.assertEqual(s2, s) + self.assertIs(type(s2), str) + self.assertIsNot(s2, s) + + self.assertRaises(TypeError, fromobject, b'abc') + self.assertRaises(TypeError, fromobject, []) + # CRASHES fromobject(NULL) + def test_from_format(self): + """Test PyUnicode_FromFormat()""" import_helper.import_module('ctypes') from ctypes import ( c_char_p, @@ -268,10 +293,10 @@ def check_format(expected, format, *args): self.assertRaisesRegex(SystemError, 'invalid format string', PyUnicode_FromFormat, b'%+i', c_int(10)) - # Test PyUnicode_AsWideChar() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidechar(self): + """Test PyUnicode_AsWideChar()""" from _testcapi import unicode_aswidechar import_helper.import_module('ctypes') from ctypes import c_wchar, sizeof @@ -307,10 +332,10 @@ def test_aswidechar(self): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') - # Test PyUnicode_AsWideCharString() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_aswidecharstring(self): + """Test PyUnicode_AsWideCharString()""" from _testcapi import unicode_aswidecharstring import_helper.import_module('ctypes') from ctypes import c_wchar, sizeof @@ -332,10 +357,10 @@ def test_aswidecharstring(self): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') - # Test PyUnicode_AsUCS4() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asucs4(self): + """Test PyUnicode_AsUCS4()""" from _testcapi import unicode_asucs4 for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', 'a\ud800b\udfffc', '\ud834\udd1e']: @@ -350,10 +375,10 @@ def test_asucs4(self): self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff') - # Test PyUnicode_AsUTF8() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8(self): + """Test PyUnicode_AsUTF8()""" from _testcapi import unicode_asutf8 bmp = '\u0100' @@ -365,10 +390,10 @@ def test_asutf8(self): self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf') self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc') - # Test PyUnicode_AsUTF8AndSize() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_asutf8andsize(self): + """Test PyUnicode_AsUTF8AndSize()""" from _testcapi import unicode_asutf8andsize bmp = '\u0100' @@ -380,54 +405,275 @@ def test_asutf8andsize(self): self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4)) self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc') - # Test PyUnicode_Count() + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_concat(self): + """Test PyUnicode_Concat()""" + from _testcapi import unicode_concat as concat + + self.assertEqual(concat('abc', 'def'), 'abcdef') + self.assertEqual(concat('abc', 'где'), 'abcгде') + self.assertEqual(concat('абв', 'def'), 'абвdef') + self.assertEqual(concat('абв', 'где'), 'абвгде') + self.assertEqual(concat('a\0b', 'c\0d'), 'a\0bc\0d') + + self.assertRaises(TypeError, concat, b'abc', 'def') + self.assertRaises(TypeError, concat, 'abc', b'def') + self.assertRaises(TypeError, concat, b'abc', b'def') + self.assertRaises(TypeError, concat, [], 'def') + self.assertRaises(TypeError, concat, 'abc', []) + self.assertRaises(TypeError, concat, [], []) + # CRASHES concat(NULL, 'def') + # CRASHES concat('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_split(self): + """Test PyUnicode_Split()""" + from _testcapi import unicode_split as split + + self.assertEqual(split('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(split('a|b|c|d', '|', 2), ['a', 'b', 'c|d']) + self.assertEqual(split('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(split('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(split('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(split('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(split(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(split('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, split, 'a|b|c|d', '') + self.assertRaises(TypeError, split, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, split, [], '|') + # CRASHES split(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rsplit(self): + """Test PyUnicode_RSplit()""" + from _testcapi import unicode_rsplit as rsplit + + self.assertEqual(rsplit('a|b|c|d', '|'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '|', 2), ['a|b', 'c', 'd']) + self.assertEqual(rsplit('a|b|c|d', '\u20ac'), ['a|b|c|d']) + self.assertEqual(rsplit('a||b|c||d', '||'), ['a', 'b|c', 'd']) + self.assertEqual(rsplit('а|б|в|г', '|'), ['а', 'б', 'в', 'г']) + self.assertEqual(rsplit('абабагаламага', 'а'), + ['', 'б', 'б', 'г', 'л', 'м', 'г', '']) + self.assertEqual(rsplit('aжbжcжd', 'ж'), ['a', 'b', 'c', 'd']) + self.assertEqual(rsplit(' a\tb\nc\rd\ve\f', NULL), + ['a', 'b', 'c', 'd', 'e']) + self.assertEqual(rsplit('a\x85b\xa0c\u1680d\u2000e', NULL), + ['a', 'b', 'c', 'd', 'e']) + + self.assertRaises(ValueError, rsplit, 'a|b|c|d', '') + self.assertRaises(TypeError, rsplit, 'a|b|c|d', ord('|')) + self.assertRaises(TypeError, rsplit, [], '|') + # CRASHES rsplit(NULL, '|') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_partition(self): + """Test PyUnicode_Partition()""" + from _testcapi import unicode_partition as partition + + self.assertEqual(partition('a|b|c', '|'), ('a', '|', 'b|c')) + self.assertEqual(partition('a||b||c', '||'), ('a', '||', 'b||c')) + self.assertEqual(partition('а|б|в', '|'), ('а', '|', 'б|в')) + self.assertEqual(partition('кабан', 'а'), ('к', 'а', 'бан')) + self.assertEqual(partition('aжbжc', 'ж'), ('a', 'ж', 'bжc')) + + self.assertRaises(ValueError, partition, 'a|b|c', '') + self.assertRaises(TypeError, partition, b'a|b|c', '|') + self.assertRaises(TypeError, partition, 'a|b|c', b'|') + self.assertRaises(TypeError, partition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, partition, [], '|') + # CRASHES partition(NULL, '|') + # CRASHES partition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_rpartition(self): + """Test PyUnicode_RPartition()""" + from _testcapi import unicode_rpartition as rpartition + + self.assertEqual(rpartition('a|b|c', '|'), ('a|b', '|', 'c')) + self.assertEqual(rpartition('a||b||c', '||'), ('a||b', '||', 'c')) + self.assertEqual(rpartition('а|б|в', '|'), ('а|б', '|', 'в')) + self.assertEqual(rpartition('кабан', 'а'), ('каб', 'а', 'н')) + self.assertEqual(rpartition('aжbжc', 'ж'), ('aжb', 'ж', 'c')) + + self.assertRaises(ValueError, rpartition, 'a|b|c', '') + self.assertRaises(TypeError, rpartition, b'a|b|c', '|') + self.assertRaises(TypeError, rpartition, 'a|b|c', b'|') + self.assertRaises(TypeError, rpartition, 'a|b|c', ord('|')) + self.assertRaises(TypeError, rpartition, [], '|') + # CRASHES rpartition(NULL, '|') + # CRASHES rpartition('a|b|c', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_splitlines(self): + """Test PyUnicode_SplitLines()""" + from _testcapi import unicode_splitlines as splitlines + + self.assertEqual(splitlines('a\nb\rc\r\nd'), ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\nb\rc\r\nd', True), + ['a\n', 'b\r', 'c\r\n', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d'), + ['a', 'b', 'c', 'd']) + self.assertEqual(splitlines('a\x85b\u2028c\u2029d', True), + ['a\x85', 'b\u2028', 'c\u2029', 'd']) + self.assertEqual(splitlines('а\nб\rв\r\nг'), ['а', 'б', 'в', 'г']) + + self.assertRaises(TypeError, splitlines, b'a\nb\rc\r\nd') + # CRASHES splitlines(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_translate(self): + """Test PyUnicode_Translate()""" + from _testcapi import unicode_translate as translate + + self.assertEqual(translate('abcd', {ord('a'): 'A', ord('b'): ord('B'), ord('c'): '<>'}), 'AB<>d') + self.assertEqual(translate('абвг', {ord('а'): 'А', ord('б'): ord('Б'), ord('в'): '<>'}), 'АБ<>г') + self.assertEqual(translate('abc', {}), 'abc') + self.assertEqual(translate('abc', []), 'abc') + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}) + self.assertRaises(UnicodeTranslateError, translate, 'abc', {ord('b'): None}, 'strict') + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') + self.assertEqual(translate('abc', {ord('b'): None}, 'ignore'), 'ac') + self.assertEqual(translate('abc', {ord('b'): None}, 'replace'), 'a\ufffdc') + self.assertEqual(translate('abc', {ord('b'): None}, 'backslashreplace'), r'a\x62c') + # XXX Other error handlers do not support UnicodeTranslateError + self.assertRaises(TypeError, translate, b'abc', []) + self.assertRaises(TypeError, translate, 123, []) + self.assertRaises(TypeError, translate, 'abc', {ord('a'): b'A'}) + self.assertRaises(TypeError, translate, 'abc', 123) + self.assertRaises(TypeError, translate, 'abc', NULL) + self.assertRaises(LookupError, translate, 'abc', {ord('b'): None}, 'foo') + # CRASHES translate(NULL, []) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_join(self): + """Test PyUnicode_Join()""" + from _testcapi import unicode_join as join + self.assertEqual(join('|', ['a', 'b', 'c']), 'a|b|c') + self.assertEqual(join('|', ['a', '', 'c']), 'a||c') + self.assertEqual(join('', ['a', 'b', 'c']), 'abc') + self.assertEqual(join(NULL, ['a', 'b', 'c']), 'a b c') + self.assertEqual(join('|', ['а', 'б', 'в']), 'а|б|в') + self.assertEqual(join('ж', ['а', 'б', 'в']), 'ажбжв') + self.assertRaises(TypeError, join, b'|', ['a', 'b', 'c']) + self.assertRaises(TypeError, join, '|', [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, NULL, [b'a', b'b', b'c']) + self.assertRaises(TypeError, join, '|', b'123') + self.assertRaises(TypeError, join, '|', 123) + self.assertRaises(SystemError, join, '|', NULL) + @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_count(self): + """Test PyUnicode_Count()""" from _testcapi import unicode_count - st = 'abcabd' - self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2) - self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1) - self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a" + for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": + for i, ch in enumerate(str): + self.assertEqual(unicode_count(str, ch, 0, len(str)), 1) + + str = "!>_= end + self.assertEqual(unicode_count(str, '!', 0, 0), 0) + self.assertEqual(unicode_count(str, '!', len(str), 0), 0) + # negative + self.assertEqual(unicode_count(str, '!', -len(str), -1), 1) + # bad arguments + self.assertRaises(TypeError, unicode_count, str, b'!', 0, len(str)) + self.assertRaises(TypeError, unicode_count, b"!>__= end - self.assertEqual(unicode_count(st, 'abc', 0, 0), 0) - self.assertEqual(unicode_count(st, 'a', 3, 2), 0) - self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0) + self.assertEqual(find(str, '!', 0, 0, 1), -1) + self.assertEqual(find(str, '!', len(str), 0, 1), -1) # negative - self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2) - self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1) - # wrong args - self.assertRaises(TypeError, unicode_count, 'a', 'a') - self.assertRaises(TypeError, unicode_count, 'a', 'a', 1) - self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1) - self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1) - # empty string - self.assertEqual(unicode_count('abc', '', 0, 3), 4) - self.assertEqual(unicode_count('abc', '', 1, 3), 3) - self.assertEqual(unicode_count('', '', 0, 1), 1) - self.assertEqual(unicode_count('', 'a', 0, 1), 0) - # different unicode kinds - for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1": - for ch in uni: - self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1) - self.assertEqual(unicode_count(st, ch, 0, len(st)), 0) - - # subclasses should still work - class MyStr(str): - pass - - self.assertEqual(unicode_count(MyStr('aab'), 'a', 0, 3), 2) - - # Test PyUnicode_FindChar() + self.assertEqual(find(str, '!', -len(str), -1, 1), 0) + self.assertEqual(find(str, '!', -len(str), -1, -1), 0) + # bad arguments + self.assertRaises(TypeError, find, str, b'!', 0, len(str), 1) + self.assertRaises(TypeError, find, b"!>__'), '<>br<>c<>d<>br<>') + self.assertEqual(replace(str, 'abra', '='), '=cad=') + self.assertEqual(replace(str, 'a', '=', 2), '=br=cadabra') + self.assertEqual(replace(str, 'a', '=', 0), str) + self.assertEqual(replace(str, 'a', '=', sys.maxsize), '=br=c=d=br=') + self.assertEqual(replace(str, 'z', '='), str) + self.assertEqual(replace(str, '', '='), '=a=b=r=a=c=a=d=a=b=r=a=') + self.assertEqual(replace(str, 'a', 'ж'), 'жbrжcжdжbrж') + self.assertEqual(replace('абабагаламага', 'а', '='), '=б=б=г=л=м=г=') + self.assertEqual(replace('Баден-Баден', 'Баден', 'Baden'), 'Baden-Baden') + # bad arguments + self.assertRaises(TypeError, replace, 'a', 'a', b'=') + self.assertRaises(TypeError, replace, 'a', b'a', '=') + self.assertRaises(TypeError, replace, b'a', 'a', '=') + self.assertRaises(TypeError, replace, 'a', 'a', ord('=')) + self.assertRaises(TypeError, replace, 'a', ord('a'), '=') + self.assertRaises(TypeError, replace, [], 'a', '=') + # CRASHES replace('a', 'a', NULL) + # CRASHES replace('a', NULL, '=') + # CRASHES replace(NULL, 'a', '=') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_compare(self): + """Test PyUnicode_Compare()""" + from _testcapi import unicode_compare as compare + + self.assertEqual(compare('abc', 'abc'), 0) + self.assertEqual(compare('abc', 'def'), -1) + self.assertEqual(compare('def', 'abc'), 1) + self.assertEqual(compare('abc', 'abc\0def'), -1) + self.assertEqual(compare('abc\0def', 'abc\0def'), 0) + self.assertEqual(compare('абв', 'abc'), 1) + + self.assertRaises(TypeError, compare, b'abc', 'abc') + self.assertRaises(TypeError, compare, 'abc', b'abc') + self.assertRaises(TypeError, compare, b'abc', b'abc') + self.assertRaises(TypeError, compare, [], 'abc') + self.assertRaises(TypeError, compare, 'abc', []) + self.assertRaises(TypeError, compare, [], []) + # CRASHES compare(NULL, 'abc') + # CRASHES compare('abc', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_comparewithasciistring(self): + """Test PyUnicode_CompareWithASCIIString()""" + from _testcapi import unicode_comparewithasciistring as comparewithasciistring + + self.assertEqual(comparewithasciistring('abc', b'abc'), 0) + self.assertEqual(comparewithasciistring('abc', b'def'), -1) + self.assertEqual(comparewithasciistring('def', b'abc'), 1) + self.assertEqual(comparewithasciistring('abc', b'abc\0def'), 0) + self.assertEqual(comparewithasciistring('abc\0def', b'abc\0def'), 1) + self.assertEqual(comparewithasciistring('абв', b'abc'), 1) + + # CRASHES comparewithasciistring(b'abc', b'abc') + # CRASHES comparewithasciistring([], b'abc') + # CRASHES comparewithasciistring(NULL, b'abc') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_richcompare(self): + """Test PyUnicode_RichCompare()""" + from _testcapi import unicode_richcompare as richcompare + + LT, LE, EQ, NE, GT, GE = range(6) + strings = ('abc', 'абв', '\U0001f600', 'abc\0') + for s1 in strings: + for s2 in strings: + self.assertIs(richcompare(s1, s2, LT), s1 < s2) + self.assertIs(richcompare(s1, s2, LE), s1 <= s2) + self.assertIs(richcompare(s1, s2, EQ), s1 == s2) + self.assertIs(richcompare(s1, s2, NE), s1 != s2) + self.assertIs(richcompare(s1, s2, GT), s1 > s2) + self.assertIs(richcompare(s1, s2, GE), s1 >= s2) + + for op in LT, LE, EQ, NE, GT, GE: + self.assertIs(richcompare(b'abc', 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', b'abc', op), NotImplemented) + self.assertIs(richcompare(b'abc', b'abc', op), NotImplemented) + self.assertIs(richcompare([], 'abc', op), NotImplemented) + self.assertIs(richcompare('abc', [], op), NotImplemented) + self.assertIs(richcompare([], [], op), NotImplemented) + + # CRASHES richcompare(NULL, 'abc', op) + # CRASHES richcompare('abc', NULL, op) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_format(self): + """Test PyUnicode_Format()""" + from _testcapi import unicode_format as format + + self.assertEqual(format('x=%d!', 42), 'x=42!') + self.assertEqual(format('x=%d!', (42,)), 'x=42!') + self.assertEqual(format('x=%d y=%s!', (42, [])), 'x=42 y=[]!') + + self.assertRaises(SystemError, format, 'x=%d!', NULL) + self.assertRaises(SystemError, format, NULL, 42) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_contains(self): + """Test PyUnicode_Contains()""" + from _testcapi import unicode_contains as contains + + self.assertEqual(contains('abcd', ''), 1) + self.assertEqual(contains('abcd', 'b'), 1) + self.assertEqual(contains('abcd', 'x'), 0) + self.assertEqual(contains('abcd', 'ж'), 0) + self.assertEqual(contains('abcd', '\0'), 0) + self.assertEqual(contains('abc\0def', '\0'), 1) + self.assertEqual(contains('abcd', 'bc'), 1) + + self.assertRaises(TypeError, contains, b'abcd', 'b') + self.assertRaises(TypeError, contains, 'abcd', b'b') + self.assertRaises(TypeError, contains, b'abcd', b'b') + self.assertRaises(TypeError, contains, [], 'b') + self.assertRaises(TypeError, contains, 'abcd', ord('b')) + # CRASHES contains(NULL, 'b') + # CRASHES contains('abcd', NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_isidentifier(self): + """Test PyUnicode_IsIdentifier()""" + from _testcapi import unicode_isidentifier as isidentifier + + self.assertEqual(isidentifier("a"), 1) + self.assertEqual(isidentifier("b0"), 1) + self.assertEqual(isidentifier("µ"), 1) + self.assertEqual(isidentifier("𝔘𝔫𝔦𝔠𝔬𝔡𝔢"), 1) + + self.assertEqual(isidentifier(""), 0) + self.assertEqual(isidentifier(" "), 0) + self.assertEqual(isidentifier("["), 0) + self.assertEqual(isidentifier("©"), 0) + self.assertEqual(isidentifier("0"), 0) + self.assertEqual(isidentifier("32M"), 0) + + # CRASHES isidentifier(b"a") + # CRASHES isidentifier([]) + # CRASHES isidentifier(NULL) - # Test PyUnicode_CopyCharacters() @support.cpython_only @unittest.skipIf(_testcapi is None, 'need _testcapi module') def test_copycharacters(self): + """Test PyUnicode_CopyCharacters()""" from _testcapi import unicode_copycharacters strings = [ diff --git a/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst new file mode 100644 index 00000000000000..ec4cda2080323f --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2022-11-19-13-34-28.gh-issue-99593.8ZfCkj.rst @@ -0,0 +1 @@ +Cover the Unicode C API with tests. diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index d5c4a9e5b95ec6..4c5049dd406a7c 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include "parts.h" static struct PyModuleDef *_testcapimodule = NULL; // set at initialization @@ -99,6 +100,17 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_RETURN_NONE; } +#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0); + +/* Test PyUnicode_FromObject() */ +static PyObject * +unicode_fromobject(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_FromObject(arg); +} + +/* Test PyUnicode_AsWideChar() */ static PyObject * unicode_aswidechar(PyObject *self, PyObject *args) { @@ -130,6 +142,7 @@ unicode_aswidechar(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsWideCharString() */ static PyObject * unicode_aswidecharstring(PyObject *self, PyObject *args) { @@ -151,6 +164,7 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +/* Test PyUnicode_AsUCS4() */ static PyObject * unicode_asucs4(PyObject *self, PyObject *args) { @@ -181,6 +195,7 @@ unicode_asucs4(PyObject *self, PyObject *args) return result; } +/* Test PyUnicode_AsUTF8() */ static PyObject * unicode_asutf8(PyObject *self, PyObject *args) { @@ -199,6 +214,7 @@ unicode_asutf8(PyObject *self, PyObject *args) return PyBytes_FromString(buffer); } +/* Test PyUnicode_AsUTF8AndSize() */ static PyObject * unicode_asutf8andsize(PyObject *self, PyObject *args) { @@ -223,26 +239,194 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, utf8_len); } +/* Test PyUnicode_Concat() */ +static PyObject * +unicode_concat(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_Concat(left, right); +} + +/* Test PyUnicode_Split() */ +static PyObject * +unicode_split(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Split(s, sep, maxsplit); +} + +/* Test PyUnicode_RSplit() */ +static PyObject * +unicode_rsplit(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + Py_ssize_t maxsplit = -1; + + if (!PyArg_ParseTuple(args, "OO|n", &s, &sep, &maxsplit)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RSplit(s, sep, maxsplit); +} + +/* Test PyUnicode_Splitlines() */ +static PyObject * +unicode_splitlines(PyObject *self, PyObject *args) +{ + PyObject *s; + int keepends = 0; + + if (!PyArg_ParseTuple(args, "O|i", &s, &keepends)) + return NULL; + + NULLABLE(s); + return PyUnicode_Splitlines(s, keepends); +} + +/* Test PyUnicode_Partition() */ +static PyObject * +unicode_partition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_Partition(s, sep); +} + +/* Test PyUnicode_RPartition() */ +static PyObject * +unicode_rpartition(PyObject *self, PyObject *args) +{ + PyObject *s; + PyObject *sep; + + if (!PyArg_ParseTuple(args, "OO", &s, &sep)) + return NULL; + + NULLABLE(s); + NULLABLE(sep); + return PyUnicode_RPartition(s, sep); +} + +/* Test PyUnicode_Translate() */ +static PyObject * +unicode_translate(PyObject *self, PyObject *args) +{ + PyObject *obj; + PyObject *table; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "OO|z", &obj, &table, &errors)) + return NULL; + + NULLABLE(obj); + NULLABLE(table); + return PyUnicode_Translate(obj, table, errors); +} + +/* Test PyUnicode_Join() */ +static PyObject * +unicode_join(PyObject *self, PyObject *args) +{ + PyObject *sep; + PyObject *seq; + + if (!PyArg_ParseTuple(args, "OO", &sep, &seq)) + return NULL; + + NULLABLE(sep); + NULLABLE(seq); + return PyUnicode_Join(sep, seq); +} + +/* Test PyUnicode_Count() */ static PyObject * unicode_count(PyObject *self, PyObject *args) { PyObject *str; PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; Py_ssize_t result; - Py_ssize_t start, end; - if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr, - &start, &end)) { + if (!PyArg_ParseTuple(args, "OOnn", &str, &substr, &start, &end)) return NULL; - } + NULLABLE(str); + NULLABLE(substr); result = PyUnicode_Count(str, substr, start, end); if (result == -1) return NULL; - else - return PyLong_FromSsize_t(result); + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_Find() */ +static PyObject * +unicode_find(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Find(str, substr, start, end, direction); + if (result == -2) + return NULL; + return PyLong_FromSsize_t(result); } +/* Test PyUnicode_Tailmatch() */ +static PyObject * +unicode_tailmatch(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + Py_ssize_t start; + Py_ssize_t end; + int direction; + Py_ssize_t result; + + if (!PyArg_ParseTuple(args, "OOnni", &str, &substr, &start, &end, &direction)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + result = PyUnicode_Tailmatch(str, substr, start, end, direction); + if (result == -1) + return NULL; + return PyLong_FromSsize_t(result); +} + +/* Test PyUnicode_FindChar() */ static PyObject * unicode_findchar(PyObject *self, PyObject *args) { @@ -264,6 +448,130 @@ unicode_findchar(PyObject *self, PyObject *args) return PyLong_FromSsize_t(result); } +/* Test PyUnicode_Replace() */ +static PyObject * +unicode_replace(PyObject *self, PyObject *args) +{ + PyObject *str; + PyObject *substr; + PyObject *replstr; + Py_ssize_t maxcount = -1; + + if (!PyArg_ParseTuple(args, "OOO|n", &str, &substr, &replstr, &maxcount)) + return NULL; + + NULLABLE(str); + NULLABLE(substr); + NULLABLE(replstr); + return PyUnicode_Replace(str, substr, replstr, maxcount); +} + +/* Test PyUnicode_Compare() */ +static PyObject * +unicode_compare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int result; + + if (!PyArg_ParseTuple(args, "OO", &left, &right)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + result = PyUnicode_Compare(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CompareWithASCIIString() */ +static PyObject * +unicode_comparewithasciistring(PyObject *self, PyObject *args) +{ + PyObject *left; + const char *right = NULL; + Py_ssize_t right_len; + int result; + + if (!PyArg_ParseTuple(args, "O|y#", &left, &right, &right_len)) + return NULL; + + NULLABLE(left); + result = PyUnicode_CompareWithASCIIString(left, right); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_RichCompare() */ +static PyObject * +unicode_richcompare(PyObject *self, PyObject *args) +{ + PyObject *left; + PyObject *right; + int op; + + if (!PyArg_ParseTuple(args, "OOi", &left, &right, &op)) + return NULL; + + NULLABLE(left); + NULLABLE(right); + return PyUnicode_RichCompare(left, right, op); +} + +/* Test PyUnicode_Format() */ +static PyObject * +unicode_format(PyObject *self, PyObject *args) +{ + PyObject *format; + PyObject *fargs; + + if (!PyArg_ParseTuple(args, "OO", &format, &fargs)) + return NULL; + + NULLABLE(format); + NULLABLE(fargs); + return PyUnicode_Format(format, fargs); +} + +/* Test PyUnicode_Contains() */ +static PyObject * +unicode_contains(PyObject *self, PyObject *args) +{ + PyObject *container; + PyObject *element; + int result; + + if (!PyArg_ParseTuple(args, "OO", &container, &element)) + return NULL; + + NULLABLE(container); + NULLABLE(element); + result = PyUnicode_Contains(container, element); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_IsIdentifier() */ +static PyObject * +unicode_isidentifier(PyObject *self, PyObject *arg) +{ + int result; + + NULLABLE(arg); + result = PyUnicode_IsIdentifier(arg); + if (result == -1 && PyErr_Occurred()) { + return NULL; + } + return PyLong_FromLong(result); +} + +/* Test PyUnicode_CopyCharacters() */ static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { @@ -711,13 +1019,31 @@ static PyMethodDef TestMethods[] = { test_unicode_compare_with_ascii, METH_NOARGS}, {"test_string_from_format", test_string_from_format, METH_NOARGS}, {"test_widechar", test_widechar, METH_NOARGS}, + {"unicode_fromobject", unicode_fromobject, METH_O}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_asutf8", unicode_asutf8, METH_VARARGS}, {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, + {"unicode_concat", unicode_concat, METH_VARARGS}, + {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, + {"unicode_split", unicode_split, METH_VARARGS}, + {"unicode_rsplit", unicode_rsplit, METH_VARARGS}, + {"unicode_partition", unicode_partition, METH_VARARGS}, + {"unicode_rpartition", unicode_rpartition, METH_VARARGS}, + {"unicode_translate", unicode_translate, METH_VARARGS}, + {"unicode_join", unicode_join, METH_VARARGS}, {"unicode_count", unicode_count, METH_VARARGS}, + {"unicode_tailmatch", unicode_tailmatch, METH_VARARGS}, + {"unicode_find", unicode_find, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, + {"unicode_replace", unicode_replace, METH_VARARGS}, + {"unicode_compare", unicode_compare, METH_VARARGS}, + {"unicode_comparewithasciistring",unicode_comparewithasciistring,METH_VARARGS}, + {"unicode_richcompare", unicode_richcompare, METH_VARARGS}, + {"unicode_format", unicode_format, METH_VARARGS}, + {"unicode_contains", unicode_contains, METH_VARARGS}, + {"unicode_isidentifier", unicode_isidentifier, METH_O}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {NULL}, };