diff --git a/Lib/test/test_capi/test_codecs.py b/Lib/test/test_capi/test_codecs.py new file mode 100644 index 00000000000000..7f5d623ec572cc --- /dev/null +++ b/Lib/test/test_capi/test_codecs.py @@ -0,0 +1,582 @@ +import unittest +import sys +from test import support +from test.support import import_helper + +try: + import _testcapi +except ImportError: + _testcapi = None + + +NULL = None + +class CAPITest(unittest.TestCase): + # TODO: Test the following functions: + # + # PyUnicode_FSConverter + # PyUnicode_FSDecoder + # PyUnicode_DecodeMBCS + # PyUnicode_DecodeMBCSStateful + # PyUnicode_DecodeCodePageStateful + # PyUnicode_AsMBCSString + # PyUnicode_EncodeCodePage + # PyUnicode_DecodeLocaleAndSize + # PyUnicode_DecodeLocale + # PyUnicode_EncodeLocale + # PyUnicode_DecodeFSDefault + # PyUnicode_DecodeFSDefaultAndSize + # PyUnicode_EncodeFSDefault + # + # Not tested deprecated functions: + # + # PyUnicode_AsDecodedObject + # PyUnicode_AsDecodedUnicode + # PyUnicode_AsEncodedObject + # PyUnicode_AsEncodedUnicode + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_fromencodedobject(self): + """Test PyUnicode_FromEncodedObject()""" + from _testcapi import unicode_fromencodedobject as fromencodedobject + + self.assertEqual(fromencodedobject(b'abc', NULL), 'abc') + self.assertEqual(fromencodedobject(b'abc', 'ascii'), 'abc') + b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80' + s = 'a\xa1\u4f60\U0001f600' + self.assertEqual(fromencodedobject(b, NULL), s) + self.assertEqual(fromencodedobject(b, 'utf-8'), s) + self.assertEqual(fromencodedobject(b, 'latin1'), b.decode('latin1')) + self.assertRaises(UnicodeDecodeError, fromencodedobject, b, 'ascii') + self.assertEqual(fromencodedobject(b, 'ascii', 'replace'), + 'a' + '\ufffd'*9) + self.assertEqual(fromencodedobject(bytearray(b), NULL), s) + self.assertEqual(fromencodedobject(bytearray(b), 'utf-8'), s) + self.assertRaises(LookupError, fromencodedobject, b'abc', 'foo') + self.assertRaises(LookupError, fromencodedobject, b, 'ascii', 'foo') + self.assertRaises(TypeError, fromencodedobject, 'abc', NULL) + self.assertRaises(TypeError, fromencodedobject, 'abc', 'ascii') + self.assertRaises(TypeError, fromencodedobject, [], NULL) + self.assertRaises(TypeError, fromencodedobject, [], 'ascii') + self.assertRaises(SystemError, fromencodedobject, NULL, NULL) + self.assertRaises(SystemError, fromencodedobject, NULL, 'ascii') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asencodedstring(self): + """Test PyUnicode_AsEncodedString()""" + from _testcapi import unicode_asencodedstring as asencodedstring + + self.assertEqual(asencodedstring('abc', NULL), b'abc') + self.assertEqual(asencodedstring('abc', 'ascii'), b'abc') + s = 'a\xa1\u4f60\U0001f600' + b = b'a\xc2\xa1\xe4\xbd\xa0\xf0\x9f\x98\x80' + self.assertEqual(asencodedstring(s, NULL), b) + self.assertEqual(asencodedstring(s, 'utf-8'), b) + self.assertEqual(asencodedstring('\xa1\xa2', 'latin1'), b'\xa1\xa2') + self.assertRaises(UnicodeEncodeError, asencodedstring, '\xa1\xa2', 'ascii') + self.assertEqual(asencodedstring(s, 'ascii', 'replace'), b'a???') + + self.assertRaises(LookupError, asencodedstring, 'abc', 'foo') + self.assertRaises(LookupError, asencodedstring, s, 'ascii', 'foo') + self.assertRaises(TypeError, asencodedstring, b'abc', NULL) + self.assertRaises(TypeError, asencodedstring, b'abc', 'ascii') + self.assertRaises(TypeError, asencodedstring, [], NULL) + self.assertRaises(TypeError, asencodedstring, [], 'ascii') + # CRASHES asencodedstring(NULL, NULL) + # CRASHES asencodedstring(NULL, 'ascii') + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_buildencodingmap(self): + """Test PyUnicode_BuildEncodingMap()""" + from _testcapi import unicode_buildencodingmap as buildencodingmap + + # XXX + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf8(self): + """Test PyUnicode_DecodeUTF8()""" + from _testcapi import unicode_decodeutf8 as decodeutf8 + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-8') + self.assertEqual(decodeutf8(b), s) + self.assertEqual(decodeutf8(b, 'strict'), s) + + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff') + self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f') + self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd') + self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb') + + self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo') + # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf8stateful(self): + """Test PyUnicode_DecodeUTF8Stateful()""" + from _testcapi import unicode_decodeutf8stateful as decodeutf8stateful + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-8') + self.assertEqual(decodeutf8stateful(b), (s, len(b))) + self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b))) + + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80') + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0') + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff') + self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1)) + self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1)) + self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb') + self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4)) + + self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo') + # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of + # "consumed". + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asutf8string(self): + """Test PyUnicode_AsUTF8String()""" + from _testcapi import unicode_asutf8string as asutf8string + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + self.assertEqual(asutf8string(s), s.encode('utf-8')) + + self.assertRaises(UnicodeEncodeError, asutf8string, '\ud8ff') + self.assertRaises(TypeError, asutf8string, b'abc') + self.assertRaises(TypeError, asutf8string, []) + # CRASHES asutf8string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf16(self): + """Test PyUnicode_DecodeUTF16()""" + from _testcapi import unicode_decodeutf16 as decodeutf16 + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-16') + self.assertEqual(decodeutf16(0, b), (naturalbyteorder, s)) + b = s.encode('utf-16le') + self.assertEqual(decodeutf16(-1, b), (-1, s)) + self.assertEqual(decodeutf16(0, b'\xff\xfe'+b), (-1, s)) + b = s.encode('utf-16be') + self.assertEqual(decodeutf16(1, b), (1, s)) + self.assertEqual(decodeutf16(0, b'\xfe\xff'+b), (1, s)) + + self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'a') + self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'a') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xff\xfea') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xfe\xffa') + + self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x00\xde') + self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xde\x00') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xde\xde') + self.assertEqual(decodeutf16(-1, b'\x00\xde', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(1, b'\xde\x00', 'replace'), (1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xde\xde', 'replace'), (0, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd')) + + self.assertRaises(UnicodeDecodeError, decodeutf16, -1, b'\x3d\xd8') + self.assertRaises(UnicodeDecodeError, decodeutf16, 1, b'\xd8\x3d') + self.assertRaises(UnicodeDecodeError, decodeutf16, 0, b'\xd8\xd8') + self.assertEqual(decodeutf16(-1, b'\x3d\xd8', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(1, b'\xd8\x3d', 'replace'), (1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xd8\xd8', 'replace'), (0, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xff\xfe\x3d\xd8', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf16(0, b'\xfe\xff\xd8\x3d', 'replace'), (1, '\ufffd')) + + self.assertRaises(LookupError, decodeutf16, -1, b'\x00\xde', 'foo') + self.assertRaises(LookupError, decodeutf16, 1, b'\xde\x00', 'foo') + self.assertRaises(LookupError, decodeutf16, 0, b'\xde\xde', 'foo') + # TODO: Test PyUnicode_DecodeUTF16() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf16stateful(self): + """Test PyUnicode_DecodeUTF16Stateful()""" + from _testcapi import unicode_decodeutf16stateful as decodeutf16stateful + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-16') + self.assertEqual(decodeutf16stateful(0, b), (naturalbyteorder, s, len(b))) + b = s.encode('utf-16le') + self.assertEqual(decodeutf16stateful(-1, b), (-1, s, len(b))) + self.assertEqual(decodeutf16stateful(0, b'\xff\xfe'+b), (-1, s, len(b)+2)) + b = s.encode('utf-16be') + self.assertEqual(decodeutf16stateful(1, b), (1, s, len(b))) + self.assertEqual(decodeutf16stateful(0, b'\xfe\xff'+b), (1, s, len(b)+2)) + + self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d'), (-1, 'a', 2)) + self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8'), (-1, 'a', 2)) + self.assertEqual(decodeutf16stateful(-1, b'\x61\x00\x3d\xd8\x00'), (-1, 'a', 2)) + self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8'), (1, 'a', 2)) + self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d'), (1, 'a', 2)) + self.assertEqual(decodeutf16stateful(1, b'\x00\x61\xd8\x3d\xde'), (1, 'a', 2)) + self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x61\x00\x3d\xd8\x00'), (-1, 'a', 4)) + self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\x00\x61\xd8\x3d\xde'), (1, 'a', 4)) + + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x00\xde') + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xde\x00') + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 0, b'\xde\xde') + self.assertEqual(decodeutf16stateful(-1, b'\x00\xde', 'replace'), (-1, '\ufffd', 2)) + self.assertEqual(decodeutf16stateful(1, b'\xde\x00', 'replace'), (1, '\ufffd', 2)) + self.assertEqual(decodeutf16stateful(0, b'\xde\xde', 'replace'), (0, '\ufffd', 2)) + self.assertEqual(decodeutf16stateful(0, b'\xff\xfe\x00\xde', 'replace'), (-1, '\ufffd', 4)) + self.assertEqual(decodeutf16stateful(0, b'\xfe\xff\xde\x00', 'replace'), (1, '\ufffd', 4)) + + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, -1, b'\x3d\xd8\x61\x00') + self.assertEqual(decodeutf16stateful(-1, b'\x3d\xd8\x61\x00', 'replace'), (-1, '\ufffda', 4)) + self.assertRaises(UnicodeDecodeError, decodeutf16stateful, 1, b'\xd8\x3d\x00\x61') + self.assertEqual(decodeutf16stateful(1, b'\xd8\x3d\x00\x61', 'replace'), (1, '\ufffda', 4)) + + self.assertRaises(LookupError, decodeutf16stateful, -1, b'\x00\xde', 'foo') + self.assertRaises(LookupError, decodeutf16stateful, 1, b'\xde\x00', 'foo') + self.assertRaises(LookupError, decodeutf16stateful, 0, b'\xde\xde', 'foo') + # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF16Stateful() with NULL as the address of + # "consumed". + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asutf16string(self): + """Test PyUnicode_AsUTF16String()""" + from _testcapi import unicode_asutf16string as asutf16string + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + self.assertEqual(asutf16string(s), s.encode('utf-16')) + + self.assertRaises(UnicodeEncodeError, asutf16string, '\ud8ff') + self.assertRaises(TypeError, asutf16string, b'abc') + self.assertRaises(TypeError, asutf16string, []) + # CRASHES asutf16string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf32(self): + """Test PyUnicode_DecodeUTF8()""" + from _testcapi import unicode_decodeutf32 as decodeutf32 + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-32') + self.assertEqual(decodeutf32(0, b), (naturalbyteorder, s)) + b = s.encode('utf-32le') + self.assertEqual(decodeutf32(-1, b), (-1, s)) + self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00'+b), (-1, s)) + b = s.encode('utf-32be') + self.assertEqual(decodeutf32(1, b), (1, s)) + self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff'+b), (1, s)) + + self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x61\x00\x00\x00\x00') + self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\x00\x61\x00') + self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00') + self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00') + + self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32, 0, b'\xff\xff\xff\xff') + self.assertEqual(decodeutf32(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf32(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd')) + self.assertEqual(decodeutf32(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd')) + self.assertEqual(decodeutf32(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd')) + self.assertEqual(decodeutf32(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd')) + + self.assertRaises(UnicodeDecodeError, decodeutf32, -1, b'\x3d\xd8\x00\x00') + self.assertEqual(decodeutf32(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd')) + self.assertRaises(UnicodeDecodeError, decodeutf32, 1, b'\x00\x00\xd8\x3d') + self.assertEqual(decodeutf32(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd')) + + self.assertRaises(LookupError, decodeutf32, -1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32, 1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32, 0, b'\xff\xff\xff\xff', 'foo') + # TODO: Test PyUnicode_DecodeUTF32() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeutf32stateful(self): + """Test PyUnicode_DecodeUTF32Stateful()""" + from _testcapi import unicode_decodeutf32stateful as decodeutf32stateful + + naturalbyteorder = -1 if sys.byteorder == 'little' else 1 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + b = s.encode('utf-32') + self.assertEqual(decodeutf32stateful(0, b), (naturalbyteorder, s, len(b))) + b = s.encode('utf-32le') + self.assertEqual(decodeutf32stateful(-1, b), (-1, s, len(b))) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, s, len(b)+4)) + b = s.encode('utf-32be') + self.assertEqual(decodeutf32stateful(1, b), (1, s, len(b))) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, s, len(b)+4)) + + self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00'), (-1, 'a', 4)) + self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6'), (-1, 'a', 4)) + self.assertEqual(decodeutf32stateful(-1, b'\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 4)) + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00'), (1, 'a', 4)) + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01'), (1, 'a', 4)) + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 4)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\x61\x00\x00\x00\x00\xf6\x01'), (-1, 'a', 8)) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\x00\x00\x00\x61\x00\x01\xf6'), (1, 'a', 8)) + + for b in b'\xff', b'\xff\xff', b'\xff\xff\xff': + self.assertEqual(decodeutf32stateful(-1, b), (-1, '', 0)) + self.assertEqual(decodeutf32stateful(1, b), (1, '', 0)) + self.assertEqual(decodeutf32stateful(0, b), (0, '', 0)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00'+b), (-1, '', 4)) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff'+b), (1, '', 4)) + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\xff\xff\xff\xff') + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 0, b'\xff\xff\xff\xff') + self.assertEqual(decodeutf32stateful(-1, b'\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 4)) + self.assertEqual(decodeutf32stateful(1, b'\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 4)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xff\xff\xff', 'replace'), (0, '\ufffd', 4)) + self.assertEqual(decodeutf32stateful(0, b'\xff\xfe\x00\x00\xff\xff\xff\xff', 'replace'), (-1, '\ufffd', 8)) + self.assertEqual(decodeutf32stateful(0, b'\x00\x00\xfe\xff\xff\xff\xff\xff', 'replace'), (1, '\ufffd', 8)) + + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, -1, b'\x3d\xd8\x00\x00') + self.assertEqual(decodeutf32stateful(-1, b'\x3d\xd8\x00\x00', 'replace'), (-1, '\ufffd', 4)) + self.assertRaises(UnicodeDecodeError, decodeutf32stateful, 1, b'\x00\x00\xd8\x3d') + self.assertEqual(decodeutf32stateful(1, b'\x00\x00\xd8\x3d', 'replace'), (1, '\ufffd', 4)) + + self.assertRaises(LookupError, decodeutf32stateful, -1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32stateful, 1, b'\xff\xff\xff\xff', 'foo') + self.assertRaises(LookupError, decodeutf32stateful, 0, b'\xff\xff\xff\xff', 'foo') + # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as data and + # negative size. + # TODO: Test PyUnicode_DecodeUTF32Stateful() with NULL as the address of + # "consumed". + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asutf32string(self): + """Test PyUnicode_AsUTF32String()""" + from _testcapi import unicode_asutf32string as asutf32string + + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: + self.assertEqual(asutf32string(s), s.encode('utf-32')) + + self.assertRaises(UnicodeEncodeError, asutf32string, '\ud8ff') + self.assertRaises(TypeError, asutf32string, b'abc') + self.assertRaises(TypeError, asutf32string, []) + # CRASHES asutf32string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeunicodeescape(self): + """Test PyUnicode_DecodeUnicodeEscape()""" + from _testcapi import unicode_decodeunicodeescape as decodeunicodeescape + + self.assertEqual(decodeunicodeescape(b'abc'), 'abc') + self.assertEqual(decodeunicodeescape(br'\t\n\r\x0b\x0c\x00\\'), '\t\n\r\v\f\0\\') + self.assertEqual(decodeunicodeescape(b'\t\n\r\x0b\x0c\x00'), '\t\n\r\v\f\0') + self.assertEqual(decodeunicodeescape(br'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decodeunicodeescape(b'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decodeunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d') + self.assertEqual(decodeunicodeescape(br'\U0001f600'), '\U0001f600') + with self.assertWarns(DeprecationWarning): + self.assertEqual(decodeunicodeescape(br'\z'), r'\z') + + for b in b'\\', br'\xa', br'\u4f6', br'\U0001f60': + self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b) + self.assertRaises(UnicodeDecodeError, decodeunicodeescape, b, 'strict') + self.assertEqual(decodeunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd') + self.assertEqual(decodeunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy') + + self.assertRaises(LookupError, decodeunicodeescape, b'\\', 'foo') + # TODO: Test PyUnicode_DecodeUnicodeEscape() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asunicodeescapestring(self): + """Test PyUnicode_AsUnicodeEscapeString()""" + from _testcapi import unicode_asunicodeescapestring as asunicodeescapestring + + self.assertEqual(asunicodeescapestring('abc'), b'abc') + self.assertEqual(asunicodeescapestring('\t\n\r\v\f\0\\'), br'\t\n\r\x0b\x0c\x00\\') + self.assertEqual(asunicodeescapestring('\xa1\xa2'), br'\xa1\xa2') + self.assertEqual(asunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d') + self.assertEqual(asunicodeescapestring('\U0001f600'), br'\U0001f600') + + self.assertRaises(TypeError, asunicodeescapestring, b'abc') + self.assertRaises(TypeError, asunicodeescapestring, []) + # CRASHES asunicodeescapestring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decoderawunicodeescape(self): + """Test PyUnicode_DecodeRawUnicodeEscape()""" + from _testcapi import unicode_decoderawunicodeescape as decoderawunicodeescape + + self.assertEqual(decoderawunicodeescape(b'abc'), 'abc') + self.assertEqual(decoderawunicodeescape(b'\t\n\r\v\f\0\\'), '\t\n\r\v\f\0\\') + self.assertEqual(decoderawunicodeescape(b'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decoderawunicodeescape(br'\u4f60\u597d'), '\u4f60\u597d') + self.assertEqual(decoderawunicodeescape(br'\U0001f600'), '\U0001f600') + self.assertEqual(decoderawunicodeescape(br'\xa1\xa2'), r'\xa1\xa2') + self.assertEqual(decoderawunicodeescape(br'\z'), r'\z') + + for b in br'\u4f6', br'\U0001f60': + self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b) + self.assertRaises(UnicodeDecodeError, decoderawunicodeescape, b, 'strict') + self.assertEqual(decoderawunicodeescape(br'x\U0001f60', 'replace'), 'x\ufffd') + self.assertEqual(decoderawunicodeescape(br'x\U0001f60y', 'replace'), 'x\ufffdy') + + self.assertRaises(LookupError, decoderawunicodeescape, br'\U0001f60', 'foo') + # TODO: Test PyUnicode_DecodeRawUnicodeEscape() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asrawunicodeescapestring(self): + """Test PyUnicode_AsRawUnicodeEscapeString()""" + from _testcapi import unicode_asrawunicodeescapestring as asrawunicodeescapestring + + self.assertEqual(asrawunicodeescapestring('abc'), b'abc') + self.assertEqual(asrawunicodeescapestring('\t\n\r\v\f\0\\'), b'\t\n\r\v\f\0\\') + self.assertEqual(asrawunicodeescapestring('\xa1\xa2'), b'\xa1\xa2') + self.assertEqual(asrawunicodeescapestring('\u4f60\u597d'), br'\u4f60\u597d') + self.assertEqual(asrawunicodeescapestring('\U0001f600'), br'\U0001f600') + + self.assertRaises(TypeError, asrawunicodeescapestring, b'abc') + self.assertRaises(TypeError, asrawunicodeescapestring, []) + # CRASHES asrawunicodeescapestring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodelatin1(self): + """Test PyUnicode_DecodeLatin1()""" + from _testcapi import unicode_decodelatin1 as decodelatin1 + + self.assertEqual(decodelatin1(b'abc'), 'abc') + self.assertEqual(decodelatin1(b'abc', 'strict'), 'abc') + self.assertEqual(decodelatin1(b'\xa1\xa2'), '\xa1\xa2') + self.assertEqual(decodelatin1(b'\xa1\xa2', 'strict'), '\xa1\xa2') + # TODO: Test PyUnicode_DecodeLatin1() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_aslatin1string(self): + """Test PyUnicode_AsLatin1String()""" + from _testcapi import unicode_aslatin1string as aslatin1string + + self.assertEqual(aslatin1string('abc'), b'abc') + self.assertEqual(aslatin1string('\xa1\xa2'), b'\xa1\xa2') + + self.assertRaises(UnicodeEncodeError, aslatin1string, '\u4f60') + self.assertRaises(TypeError, aslatin1string, b'abc') + self.assertRaises(TypeError, aslatin1string, []) + # CRASHES aslatin1string(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodeascii(self): + """Test PyUnicode_DecodeASCII()""" + from _testcapi import unicode_decodeascii as decodeascii + + self.assertEqual(decodeascii(b'abc'), 'abc') + self.assertEqual(decodeascii(b'abc', 'strict'), 'abc') + + self.assertRaises(UnicodeDecodeError, decodeascii, b'\xff') + self.assertEqual(decodeascii(b'a\xff', 'replace'), 'a\ufffd') + self.assertEqual(decodeascii(b'a\xffb', 'replace'), 'a\ufffdb') + + self.assertRaises(LookupError, decodeascii, b'a\xff', 'foo') + # TODO: Test PyUnicode_DecodeASCII() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_asasciistring(self): + """Test PyUnicode_AsASCIIString()""" + from _testcapi import unicode_asasciistring as asasciistring + + self.assertEqual(asasciistring('abc'), b'abc') + + self.assertRaises(UnicodeEncodeError, asasciistring, '\x80') + self.assertRaises(TypeError, asasciistring, b'abc') + self.assertRaises(TypeError, asasciistring, []) + # CRASHES asasciistring(NULL) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decodecharmap(self): + """Test PyUnicode_DecodeCharmap()""" + from _testcapi import unicode_decodecharmap as decodecharmap + + self.assertEqual(decodecharmap(b'\3\0\7', {0: 'a', 3: 'b', 7: 'c'}), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', ['a', 'b', 'c']), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', 'abc'), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', ['\xa1', '\xa2', '\xa3']), '\xa2\xa1\xa3') + self.assertEqual(decodecharmap(b'\1\0\2', ['\u4f60', '\u597d', '\u4e16']), '\u597d\u4f60\u4e16') + self.assertEqual(decodecharmap(b'\1\0\2', ['\U0001f600', '\U0001f601', '\U0001f602']), '\U0001f601\U0001f600\U0001f602') + + self.assertEqual(decodecharmap(b'\1\0\2', [97, 98, 99]), 'bac') + self.assertEqual(decodecharmap(b'\1\0\2', ['', 'b', 'cd']), 'bcd') + + self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {}) + self.assertRaises(UnicodeDecodeError, decodecharmap, b'\0', {0: None}) + self.assertEqual(decodecharmap(b'\1\0\2', [None, 'b', 'c'], 'replace'), 'b\ufffdc') + self.assertEqual(decodecharmap(b'\1\0\2\xff', NULL), '\1\0\2\xff') + self.assertRaises(TypeError, decodecharmap, b'\0', 42) + + # TODO: Test PyUnicode_DecodeCharmap() with NULL as data and + # negative size. + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_ascharmapstring(self): + """Test PyUnicode_AsCharmapString()""" + from _testcapi import unicode_ascharmapstring as ascharmapstring + + self.assertEqual(ascharmapstring('abc', {97: 3, 98: 0, 99: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('\xa1\xa2\xa3', {0xa1: 3, 0xa2: 0, 0xa3: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('\u4f60\u597d\u4e16', {0x4f60: 3, 0x597d: 0, 0x4e16: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('\U0001f600\U0001f601\U0001f602', {0x1f600: 3, 0x1f601: 0, 0x1f602: 7}), b'\3\0\7') + self.assertEqual(ascharmapstring('abc', {97: 3, 98: b'', 99: b'spam'}), b'\3spam') + + self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {}) + self.assertRaises(UnicodeEncodeError, ascharmapstring, 'a', {97: None}) + self.assertRaises(TypeError, ascharmapstring, b'a', {}) + self.assertRaises(TypeError, ascharmapstring, [], {}) + self.assertRaises(TypeError, ascharmapstring, 'a', NULL) + # CRASHES ascharmapstring(NULL, {}) + + @support.cpython_only + @unittest.skipIf(_testcapi is None, 'need _testcapi module') + def test_decode(self): + """Test PyUnicode_Decode()""" + from _testcapi import unicode_decode as decode + + self.assertEqual(decode(b'[\xe2\x82\xac]', 'utf-8'), '[\u20ac]') + self.assertEqual(decode(b'[\xa4]', 'iso8859-15'), '[\u20ac]') + self.assertEqual(decode(b'[\xa4]', 'iso8859-15', 'strict'), '[\u20ac]') + self.assertRaises(UnicodeDecodeError, decode, b'[\xa4]', 'utf-8') + self.assertEqual(decode(b'[\xa4]', 'utf-8', 'replace'), '[\ufffd]') + + self.assertEqual(decode(b'[\xe2\x82\xac]', NULL), '[\u20ac]') + self.assertEqual(decode(b'[\xa4]', NULL, 'replace'), '[\ufffd]') + + self.assertRaises(LookupError, decode, b'\xa4', 'foo') + self.assertRaises(LookupError, decode, b'\xa4', 'utf-8', 'foo') + # TODO: Test PyUnicode_Decode() with NULL as data and + # negative size. + + +if __name__ == "__main__": + unittest.main() diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index dbeb87c1c797da..7c015c4310b966 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -370,6 +370,22 @@ unicode_readchar(PyObject *self, PyObject *args) return PyLong_FromUnsignedLong(result); } +/* Test PyUnicode_FromEncodedObject() */ +static PyObject * +unicode_fromencodedobject(PyObject *self, PyObject *args) +{ + PyObject *obj; + const char *encoding; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "Oz|z", &obj, &encoding, &errors)) { + return NULL; + } + + NULLABLE(obj); + return PyUnicode_FromEncodedObject(obj, encoding, errors); +} + /* Test PyUnicode_FromObject() */ static PyObject * unicode_fromobject(PyObject *self, PyObject *arg) @@ -650,6 +666,417 @@ unicode_getdefaultencoding(PyObject *self, PyObject *Py_UNUSED(ignored)) return PyBytes_FromString(s); } +/* Test PyUnicode_Decode() */ +static PyObject * +unicode_decode(PyObject *self, PyObject *args) +{ + const char *s; + Py_ssize_t size; + const char *encoding; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#z|z", &s, &size, &encoding, &errors)) + return NULL; + + return PyUnicode_Decode(s, size, encoding, errors); +} + +/* Test PyUnicode_AsEncodedString() */ +static PyObject * +unicode_asencodedstring(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *encoding; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "Oz|z", &unicode, &encoding, &errors)) + return NULL; + + NULLABLE(unicode); + return PyUnicode_AsEncodedString(unicode, encoding, errors); +} + +/* Test PyUnicode_BuildEncodingMap() */ +static PyObject * +unicode_buildencodingmap(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_BuildEncodingMap(arg); +} + +/* Test PyUnicode_DecodeUTF7() */ +static PyObject * +unicode_decodeutf7(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUTF7(data, size, errors); +} + +/* Test PyUnicode_DecodeUTF7Stateful() */ +static PyObject * +unicode_decodeutf7stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF7Stateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_DecodeUTF8() */ +static PyObject * +unicode_decodeutf8(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUTF8(data, size, errors); +} + +/* Test PyUnicode_DecodeUTF8Stateful() */ +static PyObject * +unicode_decodeutf8stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF8Stateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_AsUTF8String() */ +static PyObject * +unicode_asutf8string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUTF8String(arg); +} + +/* Test PyUnicode_DecodeUTF32() */ +static PyObject * +unicode_decodeutf32(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF32(data, size, errors, &byteorder); + if (!result) { + return NULL; + } + return Py_BuildValue("(iN)", byteorder, result); +} + +/* Test PyUnicode_DecodeUTF32Stateful() */ +static PyObject * +unicode_decodeutf32stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(iNn)", byteorder, result, consumed); +} + +/* Test PyUnicode_AsUTF32String() */ +static PyObject * +unicode_asutf32string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUTF32String(arg); +} + +/* Test PyUnicode_DecodeUTF16() */ +static PyObject * +unicode_decodeutf16(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder = 0; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF16(data, size, errors, &byteorder); + if (!result) { + return NULL; + } + return Py_BuildValue("(iN)", byteorder, result); +} + +/* Test PyUnicode_DecodeUTF16Stateful() */ +static PyObject * +unicode_decodeutf16stateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + int byteorder; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &byteorder, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(iNn)", byteorder, result, consumed); +} + +/* Test PyUnicode_AsUTF16String() */ +static PyObject * +unicode_asutf16string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUTF16String(arg); +} + +/* Test PyUnicode_DecodeUnicodeEscape() */ +static PyObject * +unicode_decodeunicodeescape(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeUnicodeEscape(data, size, errors); +} + +/* Test PyUnicode_AsUnicodeEscapeString() */ +static PyObject * +unicode_asunicodeescapestring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsUnicodeEscapeString(arg); +} + +static PyObject * +unicode_decoderawunicodeescape(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeRawUnicodeEscape(data, size, errors); +} + +/* Test PyUnicode_AsRawUnicodeEscapeString() */ +static PyObject * +unicode_asrawunicodeescapestring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsRawUnicodeEscapeString(arg); +} + +static PyObject * +unicode_decodelatin1(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeLatin1(data, size, errors); +} + +/* Test PyUnicode_AsLatin1String() */ +static PyObject * +unicode_aslatin1string(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsLatin1String(arg); +} + +/* Test PyUnicode_DecodeASCII() */ +static PyObject * +unicode_decodeascii(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeASCII(data, size, errors); +} + +/* Test PyUnicode_AsASCIIString() */ +static PyObject * +unicode_asasciistring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsASCIIString(arg); +} + +/* Test PyUnicode_DecodeCharmap() */ +static PyObject * +unicode_decodecharmap(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + PyObject *mapping; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#O|z", &data, &size, &mapping, &errors)) + return NULL; + + NULLABLE(mapping); + return PyUnicode_DecodeCharmap(data, size, mapping, errors); +} + +/* Test PyUnicode_AsCharmapString() */ +static PyObject * +unicode_ascharmapstring(PyObject *self, PyObject *args) +{ + PyObject *unicode; + PyObject *mapping; + + if (!PyArg_ParseTuple(args, "OO", &unicode, &mapping)) + return NULL; + + NULLABLE(unicode); + NULLABLE(mapping); + return PyUnicode_AsCharmapString(unicode, mapping); +} + +#ifdef MS_WINDOWS + +/* Test PyUnicode_DecodeMBCS() */ +static PyObject * +unicode_decodembcs(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeMBCS(data, size, errors); +} + +/* Test PyUnicode_DecodeMBCSStateful() */ +static PyObject * +unicode_decodembcsstateful(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeMBCSStateful(data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_DecodeCodePageStateful() */ +static PyObject * +unicode_decodecodepagestateful(PyObject *self, PyObject *args) +{ + int code_page; + const char *data; + Py_ssize_t size; + const char *errors = NULL; + Py_ssize_t consumed; + PyObject *result; + + if (!PyArg_ParseTuple(args, "iy#|z", &code_page, &data, &size, &errors)) + return NULL; + + result = PyUnicode_DecodeCodePageStateful(code_page, data, size, errors, &consumed); + if (!result) { + return NULL; + } + return Py_BuildValue("(Nn)", result, consumed); +} + +/* Test PyUnicode_AsMBCSString() */ +static PyObject * +unicode_asmbcsstring(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_AsMBCSString(arg); +} + +/* Test PyUnicode_EncodeCodePage() */ +static PyObject * +unicode_encodecodepage(PyObject *self, PyObject *args) +{ + int code_page; + PyObject *unicode; + const char *errors; + + if (!PyArg_ParseTuple(args, "iO|z", &code_page, &unicode, &errors)) + return NULL; + + NULLABLE(unicode); + return PyUnicode_EncodeCodePage(code_page, unicode, errors); +} + +#endif /* MS_WINDOWS */ + /* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */ static PyObject * unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) @@ -658,6 +1085,82 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } +/* Test PyUnicode_DecodeLocaleAndSize() */ +static PyObject * +unicode_decodelocaleandsize(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeLocaleAndSize(data, size, errors); +} + +/* Test PyUnicode_DecodeLocale() */ +static PyObject * +unicode_decodelocale(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + const char *errors; + + if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) + return NULL; + + return PyUnicode_DecodeLocale(data, errors); +} + +/* Test PyUnicode_EncodeLocale() */ +static PyObject * +unicode_encodelocale(PyObject *self, PyObject *args) +{ + PyObject *unicode; + const char *errors; + + if (!PyArg_ParseTuple(args, "O|z", &unicode, &errors)) + return NULL; + + NULLABLE(unicode); + return PyUnicode_EncodeLocale(unicode, errors); +} + +/* Test PyUnicode_DecodeFSDefault() */ +static PyObject * +unicode_decodefsdefault(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + + if (!PyArg_ParseTuple(args, "y#", &data, &size)) + return NULL; + + return PyUnicode_DecodeFSDefault(data); +} + +/* Test PyUnicode_DecodeFSDefaultAndSize() */ +static PyObject * +unicode_decodefsdefaultandsize(PyObject *self, PyObject *args) +{ + const char *data; + Py_ssize_t size; + + if (!PyArg_ParseTuple(args, "y#|n", &data, &size, &size)) + return NULL; + + return PyUnicode_DecodeFSDefaultAndSize(data, size); +} + +/* Test PyUnicode_EncodeFSDefault() */ +static PyObject * +unicode_encodefsdefault(PyObject *self, PyObject *arg) +{ + NULLABLE(arg); + return PyUnicode_EncodeFSDefault(arg); +} + /* Test PyUnicode_Concat() */ static PyObject * unicode_concat(PyObject *self, PyObject *args) @@ -1452,6 +1955,7 @@ static PyMethodDef TestMethods[] = { {"unicode_substring", unicode_substring, METH_VARARGS}, {"unicode_getlength", unicode_getlength, METH_O}, {"unicode_readchar", unicode_readchar, METH_VARARGS}, + {"unicode_fromencodedobject",unicode_fromencodedobject, METH_VARARGS}, {"unicode_fromobject", unicode_fromobject, METH_O}, {"unicode_interninplace", unicode_interninplace, METH_O}, {"unicode_internfromstring", unicode_internfromstring, METH_O}, @@ -1467,6 +1971,45 @@ static PyMethodDef TestMethods[] = { {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, {"unicode_asutf8andsize_null",unicode_asutf8andsize_null, METH_VARARGS}, {"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS}, + {"unicode_decode", unicode_decode, METH_VARARGS}, + {"unicode_asencodedstring", unicode_asencodedstring, METH_VARARGS}, + {"unicode_buildencodingmap", unicode_buildencodingmap, METH_O}, + {"unicode_decodeutf7", unicode_decodeutf7, METH_VARARGS}, + {"unicode_decodeutf7stateful",unicode_decodeutf7stateful, METH_VARARGS}, + {"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS}, + {"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS}, + {"unicode_asutf8string", unicode_asutf8string, METH_O}, + {"unicode_decodeutf16", unicode_decodeutf16, METH_VARARGS}, + {"unicode_decodeutf16stateful",unicode_decodeutf16stateful, METH_VARARGS}, + {"unicode_asutf16string", unicode_asutf16string, METH_O}, + {"unicode_decodeutf32", unicode_decodeutf32, METH_VARARGS}, + {"unicode_decodeutf32stateful",unicode_decodeutf32stateful, METH_VARARGS}, + {"unicode_asutf32string", unicode_asutf32string, METH_O}, + {"unicode_decodeunicodeescape",unicode_decodeunicodeescape, METH_VARARGS}, + {"unicode_asunicodeescapestring",unicode_asunicodeescapestring,METH_O}, + {"unicode_decoderawunicodeescape",unicode_decoderawunicodeescape,METH_VARARGS}, + {"unicode_asrawunicodeescapestring",unicode_asrawunicodeescapestring,METH_O}, + {"unicode_decodelatin1", unicode_decodelatin1, METH_VARARGS}, + {"unicode_aslatin1string", unicode_aslatin1string, METH_O}, + {"unicode_decodeascii", unicode_decodeascii, METH_VARARGS}, + {"unicode_asasciistring", unicode_asasciistring, METH_O}, + {"unicode_decodecharmap", unicode_decodecharmap, METH_VARARGS}, + {"unicode_ascharmapstring", unicode_ascharmapstring, METH_VARARGS}, +#ifdef MS_WINDOWS + {"unicode_decodembcs", unicode_decodembcs, METH_VARARGS}, + {"unicode_decodembcsstateful",unicode_decodembcsstateful, METH_VARARGS}, + {"unicode_decodecodepagestateful",unicode_decodecodepagestateful,METH_VARARGS}, + {"unicode_asmbcsstring", unicode_asmbcsstring, METH_O}, + {"unicode_encodecodepage", unicode_encodecodepage, METH_VARARGS}, +#endif /* MS_WINDOWS */ + + {"unicode_decodelocaleandsize",unicode_decodelocaleandsize, METH_VARARGS}, + {"unicode_decodelocale", unicode_decodelocale, METH_VARARGS}, + {"unicode_encodelocale", unicode_encodelocale, METH_VARARGS}, + {"unicode_decodefsdefault", unicode_decodefsdefault, METH_VARARGS}, + {"unicode_decodefsdefaultandsize",unicode_decodefsdefaultandsize,METH_VARARGS}, + {"unicode_encodefsdefault", unicode_encodefsdefault, METH_O}, + {"unicode_transformdecimalandspacetoascii", unicode_transformdecimalandspacetoascii, METH_O}, {"unicode_concat", unicode_concat, METH_VARARGS}, {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b1acfc71379cd5..34e08f2ac1abc0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4530,6 +4530,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, } s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); if (s == end) { + if (consumed) { + *consumed = size; + } return u; }