diff --git a/Lib/test/clinic.test.c b/Lib/test/clinic.test.c index 4729708efd3acb..171570588e7a2b 100644 --- a/Lib/test/clinic.test.c +++ b/Lib/test/clinic.test.c @@ -530,19 +530,19 @@ test_char_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; char a = 'A'; - char b = '\x07'; - char c = '\x08'; + char b = '\a'; + char c = '\b'; char d = '\t'; char e = '\n'; - char f = '\x0b'; - char g = '\x0c'; + char f = '\v'; + char g = '\f'; char h = '\r'; char i = '"'; char j = '\''; char k = '?'; char l = '\\'; - char m = '\x00'; - char n = '\xff'; + char m = '\0'; + char n = '\377'; if (!_PyArg_CheckPositional("test_char_converter", nargs, 0, 14)) { goto exit; @@ -936,7 +936,7 @@ static PyObject * test_char_converter_impl(PyObject *module, char a, char b, char c, char d, char e, char f, char g, char h, char i, char j, char k, char l, char m, char n) -/*[clinic end generated code: output=ff11e203248582df input=e42330417a44feac]*/ +/*[clinic end generated code: output=6503d15448e1d4c4 input=e42330417a44feac]*/ /*[clinic input] @@ -1192,14 +1192,14 @@ test_int_converter a: int = 12 b: int(accept={int}) = 34 - c: int(accept={str}) = 45 + c: int(accept={str}) = '-' d: int(type='myenum') = 67 / [clinic start generated code]*/ PyDoc_STRVAR(test_int_converter__doc__, -"test_int_converter($module, a=12, b=34, c=45, d=67, /)\n" +"test_int_converter($module, a=12, b=34, c=\'-\', d=67, /)\n" "--\n" "\n"); @@ -1215,7 +1215,7 @@ test_int_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyObject *return_value = NULL; int a = 12; int b = 34; - int c = 45; + int c = '-'; myenum d = 67; if (!_PyArg_CheckPositional("test_int_converter", nargs, 0, 4)) { @@ -1266,7 +1266,7 @@ test_int_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) static PyObject * test_int_converter_impl(PyObject *module, int a, int b, int c, myenum d) -/*[clinic end generated code: output=fbcfb7554688663d input=d20541fc1ca0553e]*/ +/*[clinic end generated code: output=d5357b563bdb8789 input=5d8f4eb5899b24de]*/ /*[clinic input] diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index e71f9fc181bb43..93c284e58764f4 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -1081,6 +1081,187 @@ def test_param_with_continuations(self): p = function.parameters['follow_symlinks'] self.assertEqual(True, p.default) + def test_param_default_none(self): + function = self.parse_function(r""" + module test + test.func + obj: object = None + str: str(accept={str, NoneType}) = None + buf: Py_buffer(accept={str, buffer, NoneType}) = None + """) + p = function.parameters['obj'] + self.assertIs(p.default, None) + self.assertEqual(p.converter.py_default, 'None') + self.assertEqual(p.converter.c_default, 'Py_None') + + p = function.parameters['str'] + self.assertIs(p.default, None) + self.assertEqual(p.converter.py_default, 'None') + self.assertEqual(p.converter.c_default, 'NULL') + + p = function.parameters['buf'] + self.assertIs(p.default, None) + self.assertEqual(p.converter.py_default, 'None') + self.assertEqual(p.converter.c_default, '{NULL, NULL}') + + def test_param_default_null(self): + function = self.parse_function(r""" + module test + test.func + obj: object = NULL + str: str = NULL + buf: Py_buffer = NULL + fsencoded: unicode_fs_encoded = NULL + fsdecoded: unicode_fs_decoded = NULL + """) + p = function.parameters['obj'] + self.assertIs(p.default, NULL) + self.assertEqual(p.converter.py_default, '') + self.assertEqual(p.converter.c_default, 'NULL') + + p = function.parameters['str'] + self.assertIs(p.default, NULL) + self.assertEqual(p.converter.py_default, '') + self.assertEqual(p.converter.c_default, 'NULL') + + p = function.parameters['buf'] + self.assertIs(p.default, NULL) + self.assertEqual(p.converter.py_default, '') + self.assertEqual(p.converter.c_default, '{NULL, NULL}') + + p = function.parameters['fsencoded'] + self.assertIs(p.default, NULL) + self.assertEqual(p.converter.py_default, '') + self.assertEqual(p.converter.c_default, 'NULL') + + p = function.parameters['fsdecoded'] + self.assertIs(p.default, NULL) + self.assertEqual(p.converter.py_default, '') + self.assertEqual(p.converter.c_default, 'NULL') + + def test_param_default_str_literal(self): + function = self.parse_function(r""" + module test + test.func + str: str = ' \t\n\r\v\f\xa0' + buf: Py_buffer(accept={str, buffer}) = ' \t\n\r\v\f\xa0' + """) + p = function.parameters['str'] + self.assertEqual(p.default, ' \t\n\r\v\f\xa0') + self.assertEqual(p.converter.py_default, r"' \t\n\r\x0b\x0c\xa0'") + self.assertEqual(p.converter.c_default, r'" \t\n\r\v\f\u00a0"') + + p = function.parameters['buf'] + self.assertEqual(p.default, ' \t\n\r\v\f\xa0') + self.assertEqual(p.converter.py_default, r"' \t\n\r\x0b\x0c\xa0'") + self.assertEqual(p.converter.c_default, + r'{.buf = " \t\n\r\v\f\302\240", .obj = NULL, .len = 8}') + + def test_param_default_bytes_literal(self): + function = self.parse_function(r""" + module test + test.func + str: str(accept={robuffer}) = b' \t\n\r\v\f\xa0' + buf: Py_buffer = b' \t\n\r\v\f\xa0' + """) + p = function.parameters['str'] + self.assertEqual(p.default, b' \t\n\r\v\f\xa0') + self.assertEqual(p.converter.py_default, r"b' \t\n\r\x0b\x0c\xa0'") + self.assertEqual(p.converter.c_default, r'" \t\n\r\v\f\240"') + + p = function.parameters['buf'] + self.assertEqual(p.default, b' \t\n\r\v\f\xa0') + self.assertEqual(p.converter.py_default, r"b' \t\n\r\x0b\x0c\xa0'") + self.assertEqual(p.converter.c_default, + r'{.buf = " \t\n\r\v\f\240", .obj = NULL, .len = 7}') + + def test_param_default_byte_literal(self): + function = self.parse_function(r""" + module test + test.func + zero: char = b'\0' + one: char = b'\1' + lf: char = b'\n' + nbsp: char = b'\xa0' + """) + p = function.parameters['zero'] + self.assertEqual(p.default, b'\0') + self.assertEqual(p.converter.py_default, r"b'\x00'") + self.assertEqual(p.converter.c_default, r"'\0'") + + p = function.parameters['one'] + self.assertEqual(p.default, b'\1') + self.assertEqual(p.converter.py_default, r"b'\x01'") + self.assertEqual(p.converter.c_default, r"'\001'") + + p = function.parameters['lf'] + self.assertEqual(p.default, b'\n') + self.assertEqual(p.converter.py_default, r"b'\n'") + self.assertEqual(p.converter.c_default, r"'\n'") + + p = function.parameters['nbsp'] + self.assertEqual(p.default, b'\xa0') + self.assertEqual(p.converter.py_default, r"b'\xa0'") + self.assertEqual(p.converter.c_default, r"'\240'") + + def test_param_default_unicode_char(self): + function = self.parse_function(r""" + module test + test.func + zero: int(accept={str}) = '\0' + one: int(accept={str}) = '\1' + lf: int(accept={str}) = '\n' + nbsp: int(accept={str}) = '\xa0' + snake: int(accept={str}) = '\U0001f40d' + """) + p = function.parameters['zero'] + self.assertEqual(p.default, '\0') + self.assertEqual(p.converter.py_default, r"'\x00'") + self.assertEqual(p.converter.c_default, '0') + + p = function.parameters['one'] + self.assertEqual(p.default, '\1') + self.assertEqual(p.converter.py_default, r"'\x01'") + self.assertEqual(p.converter.c_default, '0x01') + + p = function.parameters['lf'] + self.assertEqual(p.default, '\n') + self.assertEqual(p.converter.py_default, r"'\n'") + self.assertEqual(p.converter.c_default, r"'\n'") + + p = function.parameters['nbsp'] + self.assertEqual(p.default, '\xa0') + self.assertEqual(p.converter.py_default, r"'\xa0'") + self.assertEqual(p.converter.c_default, '0xa0') + + p = function.parameters['snake'] + self.assertEqual(p.default, '\U0001f40d') + self.assertEqual(p.converter.py_default, "'\U0001f40d'") + self.assertEqual(p.converter.c_default, '0x1f40d') + + def test_param_default_bool(self): + function = self.parse_function(r""" + module test + test.func + bool: bool = True + intbool: bool(accept={int}) = True + intbool2: bool(accept={int}) = 2 + """) + p = function.parameters['bool'] + self.assertIs(p.default, True) + self.assertEqual(p.converter.py_default, 'True') + self.assertEqual(p.converter.c_default, '1') + + p = function.parameters['intbool'] + self.assertIs(p.default, True) + self.assertEqual(p.converter.py_default, 'True') + self.assertEqual(p.converter.c_default, '1') + + p = function.parameters['intbool2'] + self.assertEqual(p.default, 2) + self.assertEqual(p.converter.py_default, '2') + self.assertEqual(p.converter.c_default, '2') + def test_param_default_expr_named_constant(self): function = self.parse_function(""" module os @@ -4432,6 +4613,56 @@ def test_format_escape(self): out = libclinic.format_escape(line) self.assertEqual(out, expected) + def test_c_bytes_repr(self): + c_bytes_repr = libclinic.c_bytes_repr + self.assertEqual(c_bytes_repr(b''), '""') + self.assertEqual(c_bytes_repr(b'abc'), '"abc"') + self.assertEqual(c_bytes_repr(b'\a\b\f\n\r\t\v'), r'"\a\b\f\n\r\t\v"') + self.assertEqual(c_bytes_repr(b' \0\x7f'), r'" \000\177"') + self.assertEqual(c_bytes_repr(b'"'), r'"\""') + self.assertEqual(c_bytes_repr(b"'"), r'''"'"''') + self.assertEqual(c_bytes_repr(b'\\'), r'"\\"') + self.assertEqual(c_bytes_repr(b'??/'), r'"?\?/"') + self.assertEqual(c_bytes_repr(b'???/'), r'"?\?\?/"') + self.assertEqual(c_bytes_repr(b'/*****/ /*/ */*'), r'"/\*****\/ /\*\/ *\/\*"') + self.assertEqual(c_bytes_repr(b'\xa0'), r'"\240"') + self.assertEqual(c_bytes_repr(b'\xff'), r'"\377"') + + def test_c_str_repr(self): + c_str_repr = libclinic.c_str_repr + self.assertEqual(c_str_repr(''), '""') + self.assertEqual(c_str_repr('abc'), '"abc"') + self.assertEqual(c_str_repr('\a\b\f\n\r\t\v'), r'"\a\b\f\n\r\t\v"') + self.assertEqual(c_str_repr(' \0\x7f'), r'" \000\177"') + self.assertEqual(c_str_repr('"'), r'"\""') + self.assertEqual(c_str_repr("'"), r'''"'"''') + self.assertEqual(c_str_repr('\\'), r'"\\"') + self.assertEqual(c_str_repr('??/'), r'"?\?/"') + self.assertEqual(c_str_repr('???/'), r'"?\?\?/"') + self.assertEqual(c_str_repr('/*****/ /*/ */*'), r'"/\*****\/ /\*\/ *\/\*"') + self.assertEqual(c_str_repr('\xa0'), r'"\u00a0"') + self.assertEqual(c_str_repr('\xff'), r'"\u00ff"') + self.assertEqual(c_str_repr('\u20ac'), r'"\u20ac"') + self.assertEqual(c_str_repr('\U0001f40d'), r'"\U0001f40d"') + + def test_c_unichar_repr(self): + c_unichar_repr = libclinic.c_unichar_repr + self.assertEqual(c_unichar_repr('a'), "'a'") + self.assertEqual(c_unichar_repr('\n'), r"'\n'") + self.assertEqual(c_unichar_repr('\b'), r"'\b'") + self.assertEqual(c_unichar_repr('\0'), '0') + self.assertEqual(c_unichar_repr('\1'), '0x01') + self.assertEqual(c_unichar_repr('\x7f'), '0x7f') + self.assertEqual(c_unichar_repr(' '), "' '") + self.assertEqual(c_unichar_repr('"'), """'"'""") + self.assertEqual(c_unichar_repr("'"), r"'\''") + self.assertEqual(c_unichar_repr('\\'), r"'\\'") + self.assertEqual(c_unichar_repr('?'), "'?'") + self.assertEqual(c_unichar_repr('\xa0'), '0xa0') + self.assertEqual(c_unichar_repr('\xff'), '0xff') + self.assertEqual(c_unichar_repr('\u20ac'), '0x20ac') + self.assertEqual(c_unichar_repr('\U0001f40d'), '0x1f40d') + def test_indent_all_lines(self): # Blank lines are expected to be unchanged. self.assertEqual(libclinic.indent_all_lines("", prefix="bar"), "") diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 6b23a5c637a308..9d803dc94b64c7 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -6395,7 +6395,7 @@ datetime_str(PyObject *op) /*[clinic input] datetime.datetime.isoformat - sep: int(accept={str}, c_default="'T'", py_default="'T'") = ord('T') + sep: int(accept={str}) = 'T' timespec: str(c_default="NULL") = 'auto' Return the time formatted according to ISO. @@ -6417,7 +6417,7 @@ terms of the time to include. Valid options are 'auto', 'hours', static PyObject * datetime_datetime_isoformat_impl(PyDateTime_DateTime *self, int sep, const char *timespec) -/*[clinic end generated code: output=9b6ce1383189b0bf input=2fa2512172ccf5d5]*/ +/*[clinic end generated code: output=9b6ce1383189b0bf input=db935a57fa697c5e]*/ { char buffer[100]; diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c index 890f2201b46bc0..66a375589ba38e 100644 --- a/Modules/_testclinic.c +++ b/Modules/_testclinic.c @@ -334,14 +334,14 @@ int_converter a: int = 12 b: int(accept={int}) = 34 - c: int(accept={str}) = 45 + c: int(accept={str}) = '-' / [clinic start generated code]*/ static PyObject * int_converter_impl(PyObject *module, int a, int b, int c) -/*[clinic end generated code: output=8e56b59be7d0c306 input=a1dbc6344853db7a]*/ +/*[clinic end generated code: output=8e56b59be7d0c306 input=9a306d4dc907e339]*/ { RETURN_PACKED_ARGS(3, PyLong_FromLong, long, a, b, c); } @@ -1365,6 +1365,7 @@ clone_f2_impl(PyObject *module, const char *path) class custom_t_converter(CConverter): type = 'custom_t' converter = 'custom_converter' + c_init_default = "" # overridden in pre_render(() def pre_render(self): self.c_default = f'''{{ @@ -1372,7 +1373,7 @@ class custom_t_converter(CConverter): }}''' [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=b2fb801e99a06bf6]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=78fe84e5ecc0481b]*/ /*[clinic input] diff --git a/Modules/binascii.c b/Modules/binascii.c index c076b12fb149b2..2d1d0445ef958a 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -576,7 +576,7 @@ binascii.a2b_base64 When set to true, bytes that are not part of the base64 standard are not allowed. The same applies to excess data after padding (= / ==). Set to True by default if ignorechars is specified, False otherwise. - ignorechars: Py_buffer(py_default="") = None + ignorechars: Py_buffer = NULL A byte string containing characters to ignore from the input when strict_mode is true. @@ -586,7 +586,7 @@ Decode a line of base64 data. static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, Py_buffer *ignorechars) -/*[clinic end generated code: output=eab37aea4cfa6daa input=3be4937d72943835]*/ +/*[clinic end generated code: output=eab37aea4cfa6daa input=d2d238abf13822ed]*/ { assert(data->len >= 0); @@ -852,7 +852,7 @@ binascii.a2b_ascii85 Allow 'y' as a short form encoding four spaces. adobe: bool = False Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. - ignorechars: Py_buffer(c_default="NULL", py_default="b''") = None + ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. Decode Ascii85 data. @@ -861,7 +861,7 @@ Decode Ascii85 data. static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, int adobe, Py_buffer *ignorechars) -/*[clinic end generated code: output=599aa3e41095a651 input=20796c9b23cec213]*/ +/*[clinic end generated code: output=599aa3e41095a651 input=f39abd11eab4bac0]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -893,9 +893,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } ignorecache_t ignorecache; - if (ignorechars != NULL) { - memset(ignorecache, 0, sizeof(ignorecache)); - } + memset(ignorecache, 0, sizeof(ignorecache)); /* Allocate output buffer. */ size_t bin_len = ascii_len; diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 89b0ebd516f693..bb3b934be69dd7 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -670,9 +670,9 @@ _blake2.blake2b.__new__ as py_blake2b_new data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2B_OUT_BYTES") = _blake2.blake2b.MAX_DIGEST_SIZE - key: Py_buffer(c_default="NULL", py_default="b''") = None - salt: Py_buffer(c_default="NULL", py_default="b''") = None - person: Py_buffer(c_default="NULL", py_default="b''") = None + key: Py_buffer = b'' + salt: Py_buffer = b'' + person: Py_buffer = b'' fanout: int = 1 depth: int = 1 leaf_size: unsigned_long = 0 @@ -693,7 +693,7 @@ py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, unsigned long long node_offset, int node_depth, int inner_size, int last_node, int usedforsecurity, PyObject *string) -/*[clinic end generated code: output=de64bd850606b6a0 input=78cf60a2922d2f90]*/ +/*[clinic end generated code: output=de64bd850606b6a0 input=32832fb37d13c03d]*/ { PyObject *data; if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { @@ -710,9 +710,9 @@ _blake2.blake2s.__new__ as py_blake2s_new data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2S_OUT_BYTES") = _blake2.blake2s.MAX_DIGEST_SIZE - key: Py_buffer(c_default="NULL", py_default="b''") = None - salt: Py_buffer(c_default="NULL", py_default="b''") = None - person: Py_buffer(c_default="NULL", py_default="b''") = None + key: Py_buffer = b'' + salt: Py_buffer = b'' + person: Py_buffer = b'' fanout: int = 1 depth: int = 1 leaf_size: unsigned_long = 0 @@ -733,7 +733,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, unsigned long long node_offset, int node_depth, int inner_size, int last_node, int usedforsecurity, PyObject *string) -/*[clinic end generated code: output=582a0c4295cc3a3c input=6843d6332eefd295]*/ +/*[clinic end generated code: output=582a0c4295cc3a3c input=da467fc9dae646bb]*/ { PyObject *data; if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index 9bcd0eeb008142..05615c1fdd81b9 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -273,19 +273,19 @@ char_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; char a = 'A'; - char b = '\x07'; - char c = '\x08'; + char b = '\a'; + char c = '\b'; char d = '\t'; char e = '\n'; - char f = '\x0b'; - char g = '\x0c'; + char f = '\v'; + char g = '\f'; char h = '\r'; char i = '"'; char j = '\''; char k = '?'; char l = '\\'; - char m = '\x00'; - char n = '\xff'; + char m = '\0'; + char n = '\377'; if (!_PyArg_CheckPositional("char_converter", nargs, 0, 14)) { goto exit; @@ -879,7 +879,7 @@ unsigned_short_converter(PyObject *module, PyObject *const *args, Py_ssize_t nar } PyDoc_STRVAR(int_converter__doc__, -"int_converter($module, a=12, b=34, c=45, /)\n" +"int_converter($module, a=12, b=34, c=\'-\', /)\n" "--\n" "\n"); @@ -895,7 +895,7 @@ int_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) PyObject *return_value = NULL; int a = 12; int b = 34; - int c = 45; + int c = '-'; if (!_PyArg_CheckPositional("int_converter", nargs, 0, 3)) { goto exit; @@ -4600,4 +4600,4 @@ _testclinic_TestClass_posonly_poskw_varpos_array_no_fastcall(PyObject *type, PyO exit: return return_value; } -/*[clinic end generated code: output=290d2e346ea7bfa1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9971dbbc5f62b8d2 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 68ab9c999b2894..513e54b71a0065 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -354,7 +354,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py_buffer data = {NULL, NULL}; int foldspaces = 0; int adobe = 0; - Py_buffer ignorechars = {NULL, NULL}; + Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1281,4 +1281,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=28de2d0774a0a4d7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5c53e7e185700742 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h index 97d010d03a4b23..dc70abfaf05543 100644 --- a/Modules/clinic/blake2module.c.h +++ b/Modules/clinic/blake2module.c.h @@ -63,9 +63,9 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2B_OUT_BYTES; - Py_buffer key = {NULL, NULL}; - Py_buffer salt = {NULL, NULL}; - Py_buffer person = {NULL, NULL}; + Py_buffer key = {.buf = "", .obj = NULL, .len = 0}; + Py_buffer salt = {.buf = "", .obj = NULL, .len = 0}; + Py_buffer person = {.buf = "", .obj = NULL, .len = 0}; int fanout = 1; int depth = 1; unsigned long leaf_size = 0; @@ -272,9 +272,9 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2S_OUT_BYTES; - Py_buffer key = {NULL, NULL}; - Py_buffer salt = {NULL, NULL}; - Py_buffer person = {NULL, NULL}; + Py_buffer key = {.buf = "", .obj = NULL, .len = 0}; + Py_buffer salt = {.buf = "", .obj = NULL, .len = 0}; + Py_buffer person = {.buf = "", .obj = NULL, .len = 0}; int fanout = 1; int depth = 1; unsigned long leaf_size = 0; @@ -506,4 +506,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl((Blake2Object *)self); } -/*[clinic end generated code: output=60a4abbcb8950fe5 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5f76c18211cd7f8f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 6fba75339b3206..f8fd111754a894 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -210,7 +210,7 @@ zlib_decompress(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(zlib_compressobj__doc__, "compressobj($module, /, level=Z_DEFAULT_COMPRESSION, method=DEFLATED,\n" " wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL,\n" -" strategy=Z_DEFAULT_STRATEGY, zdict=None)\n" +" strategy=Z_DEFAULT_STRATEGY, zdict=)\n" "--\n" "\n" "Return a compressor object.\n" @@ -1402,4 +1402,4 @@ zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=fa5fc356f3090cce input=a9049054013a1b77]*/ +/*[clinic end generated code: output=13627e14206d3552 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 8c360ce3b79b8a..157965195e1fa0 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -3113,25 +3113,22 @@ class path_t_converter(CConverter): type = "path_t" impl_by_reference = True parse_by_reference = True + default_type = () + c_init_default = "" # overridden in pre_render(() converter = 'path_converter' def converter_init(self, *, allow_fd=False, make_wide=None, nonstrict=False, nullable=False, suppress_value_error=False): - # right now path_t doesn't support default values. - # to support a default value, you'll need to override initialize(). - if self.default not in (unspecified, None): - fail("Can't specify a default to the path_t converter!") - - if self.c_default not in (None, 'Py_None'): - raise RuntimeError("Can't specify a c_default to the path_t converter!") self.nullable = nullable self.nonstrict = nonstrict self.make_wide = make_wide self.suppress_value_error = suppress_value_error self.allow_fd = allow_fd + if nullable: + self.default_type = NoneType def pre_render(self): def strify(value): @@ -3166,6 +3163,8 @@ class path_t_converter(CConverter): class dir_fd_converter(CConverter): type = 'int' + default_type = NoneType + c_init_default = 'DEFAULT_DIR_FD' def converter_init(self, requires=None): if self.default in (unspecified, None): @@ -3175,6 +3174,9 @@ class dir_fd_converter(CConverter): else: self.converter = 'dir_fd_converter' + def c_default_init(self): + self.c_default = 'DEFAULT_DIR_FD' + class uid_t_converter(CConverter): type = "uid_t" converter = '_Py_Uid_Converter' @@ -3255,7 +3257,7 @@ class confname_converter(CConverter): """, argname=argname, converter=self.converter, table=self.table) [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=d2759f2332cd39b3]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=d58f18bdf3bd3565]*/ /*[clinic input] @@ -5668,7 +5670,7 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitroot - path: path_t, + path: path_t / Removes everything after the root on Win32. @@ -5676,7 +5678,7 @@ Removes everything after the root on Win32. static PyObject * os__path_splitroot_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=ab7f1a88b654581c input=42831e41f8458f6d]*/ +/*[clinic end generated code: output=ab7f1a88b654581c input=d356de1edb6050a2]*/ { wchar_t *buffer; wchar_t *end; @@ -5978,7 +5980,7 @@ os__path_lexists_impl(PyObject *module, path_t *path) /*[clinic input] os._path_isdir -> bool - path: path_t(allow_fd=True, suppress_value_error=True), + path: path_t(allow_fd=True, suppress_value_error=True) / Return true if the pathname refers to an existing directory. @@ -5987,7 +5989,7 @@ Return true if the pathname refers to an existing directory. static int os__path_isdir_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=d5786196f9e2fa7a input=0d3fd790564d244b]*/ +/*[clinic end generated code: output=d5786196f9e2fa7a input=b15f9b697a7a759f]*/ { return _testFileType(path, PY_IFDIR); } @@ -6056,7 +6058,7 @@ os__path_isjunction_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitroot_ex - path: path_t(make_wide=True, nonstrict=True), + path: path_t(make_wide=True, nonstrict=True) / Split a pathname into drive, root and tail. @@ -6066,7 +6068,7 @@ The tail contains anything after the root. static PyObject * os__path_splitroot_ex_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=4b0072b6cdf4b611 input=4ac47b394d68bd21]*/ +/*[clinic end generated code: output=4b0072b6cdf4b611 input=012fbfad14888b2b]*/ { Py_ssize_t drvsize, rootsize; PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index f546f3ff1cb864..7a8ed979bbe65d 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -550,7 +550,7 @@ zlib.compressobj strategy: int(c_default="Z_DEFAULT_STRATEGY") = Z_DEFAULT_STRATEGY Used to tune the compression algorithm. Possible values are Z_DEFAULT_STRATEGY, Z_FILTERED, and Z_HUFFMAN_ONLY. - zdict: Py_buffer = None + zdict: Py_buffer = NULL The predefined compression dictionary - a sequence of bytes containing subsequences that are likely to occur in the input data. @@ -560,7 +560,7 @@ Return a compressor object. static PyObject * zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, int memLevel, int strategy, Py_buffer *zdict) -/*[clinic end generated code: output=8b5bed9c8fc3814d input=2fa3d026f90ab8d5]*/ +/*[clinic end generated code: output=8b5bed9c8fc3814d input=1a6f61d8a8885c0d]*/ { zlibstate *state = get_zlib_state(module); if (zdict->buf != NULL && (size_t)zdict->len > UINT_MAX) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a65c43874e876c..dd2482ca653fb1 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -86,14 +86,12 @@ class Py_UCS4_converter(CConverter): type = 'Py_UCS4' converter = 'convert_uc' - def converter_init(self): - if self.default is not unspecified: - self.c_default = ascii(self.default) - if len(self.c_default) > 4 or self.c_default[0] != "'": - self.c_default = hex(ord(self.default)) + def c_default_init(self): + import libclinic + self.c_default = libclinic.c_unichar_repr(self.default) [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=88f5dd06cd8e7a61]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=22f057b68fd9a65a]*/ /* --- Globals ------------------------------------------------------------ diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py index f3f09107aefce6..5b4b2b134584ad 100644 --- a/Tools/c-analyzer/c_parser/parser/__init__.py +++ b/Tools/c-analyzer/c_parser/parser/__init__.py @@ -219,7 +219,7 @@ def _iter_source(lines, *, maxtext=11_000, maxlines=200, showtext=False): msg = f''' too much text, try to increase MAX_SIZES[MAXTEXT] in cpython/_parser.py {filename} starting at line {lno_from} to {lno_to} - has code with length {len(text)} greater than {maxtext}: + has code with length {len(srcinfo.text)} greater than {maxtext}: {text} ''' raise RuntimeError(textwrap.dedent(msg)) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 890b0eb0bd7079..a251a045b91144 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -344,6 +344,7 @@ def format_tsv_lines(lines): _abs('Modules/_ssl_data_300.h'): (80_000, 10_000), _abs('Modules/_ssl_data_111.h'): (80_000, 10_000), _abs('Modules/cjkcodecs/mappings_*.h'): (160_000, 2_000), + _abs('Modules/clinic/_testclinic.c.h'): (120_000, 5_000), _abs('Modules/unicodedata_db.h'): (180_000, 3_000), _abs('Modules/unicodename_db.h'): (1_200_000, 15_000), _abs('Objects/unicodetype_db.h'): (240_000, 3_000), diff --git a/Tools/clinic/libclinic/__init__.py b/Tools/clinic/libclinic/__init__.py index 9e9bdeadcc0fe1..5ee165d0c138a8 100644 --- a/Tools/clinic/libclinic/__init__.py +++ b/Tools/clinic/libclinic/__init__.py @@ -7,7 +7,9 @@ ) from .formatting import ( SIG_END_MARKER, - c_repr, + c_str_repr, + c_bytes_repr, + c_unichar_repr, docstring_for_c_string, format_escape, indent_all_lines, @@ -26,7 +28,7 @@ from .utils import ( FormatCounterFormatter, NULL, - Null, + NullType, Sentinels, VersionTuple, compute_checksum, @@ -45,7 +47,9 @@ # Formatting helpers "SIG_END_MARKER", - "c_repr", + "c_str_repr", + "c_bytes_repr", + "c_unichar_repr", "docstring_for_c_string", "format_escape", "indent_all_lines", @@ -64,7 +68,7 @@ # Utility functions "FormatCounterFormatter", "NULL", - "Null", + "NullType", "Sentinels", "VersionTuple", "compute_checksum", diff --git a/Tools/clinic/libclinic/clanguage.py b/Tools/clinic/libclinic/clanguage.py index 341667d2f0bff9..7f02c7790f015a 100644 --- a/Tools/clinic/libclinic/clanguage.py +++ b/Tools/clinic/libclinic/clanguage.py @@ -101,7 +101,7 @@ def compiler_deprecated_warning( code = self.COMPILER_DEPRECATION_WARNING_PROTOTYPE.format( major=minversion[0], minor=minversion[1], - message=libclinic.c_repr(message), + message=libclinic.c_str_repr(message), ) return libclinic.normalize_snippet(code) diff --git a/Tools/clinic/libclinic/converter.py b/Tools/clinic/libclinic/converter.py index ac66e79f93b735..c10235237d4b71 100644 --- a/Tools/clinic/libclinic/converter.py +++ b/Tools/clinic/libclinic/converter.py @@ -6,7 +6,7 @@ import libclinic from libclinic import fail -from libclinic import Sentinels, unspecified, unknown +from libclinic import Sentinels, unspecified, unknown, NULL from libclinic.codegen import CRenderData, Include, TemplateDict from libclinic.function import Function, Parameter @@ -83,9 +83,9 @@ class CConverter(metaclass=CConverterAutoRegister): # at runtime). default: object = unspecified - # If not None, default must be isinstance() of this type. + # default must be isinstance() of this type. # (You can also specify a tuple of types.) - default_type: bltns.type[object] | tuple[bltns.type[object], ...] | None = None + default_type: bltns.type[object] | tuple[bltns.type[object], ...] = object # "default" converted into a C value, as a string. # Or None if there is no default. @@ -95,6 +95,13 @@ class CConverter(metaclass=CConverterAutoRegister): # Or None if there is no default. py_default: str | None = None + # The default value used to initialize the C variable when + # there is no default. + # + # Every non-abstract subclass with non-trivial cleanup() should supply + # a valid value. + c_init_default: str = '' + # The default value used to initialize the C variable when # there is no default, but not specifying a default may # result in an "uninitialized variable" warning. This can @@ -105,7 +112,7 @@ class CConverter(metaclass=CConverterAutoRegister): # # This value is specified as a string. # Every non-abstract subclass should supply a valid value. - c_ignored_default: str = 'NULL' + c_ignored_default: str = '' # If true, wrap with Py_UNUSED. unused = False @@ -182,9 +189,25 @@ def __init__(self, self.unused = unused self._includes: list[Include] = [] + if c_default: + self.c_default = c_default + if py_default: + self.py_default = py_default + + if annotation is not unspecified: + fail("The 'annotation' parameter is not currently permitted.") + + # Make sure not to set self.function until after converter_init() has been called. + # This prevents you from caching information + # about the function in converter_init(). + # (That breaks if we get cloned.) + self.converter_init(**kwargs) + if default is not unspecified: - if (self.default_type - and default is not unknown + if self.default_type == (): + conv_name = self.__class__.__name__.removesuffix('_converter') + fail(f"A '{conv_name}' parameter cannot be marked optional.") + if (default is not unknown and not isinstance(default, self.default_type) ): if isinstance(self.default_type, type): @@ -197,19 +220,38 @@ def __init__(self, f"{name!r} is not of type {types_str!r}") self.default = default - if c_default: - self.c_default = c_default - if py_default: - self.py_default = py_default - - if annotation is not unspecified: - fail("The 'annotation' parameter is not currently permitted.") + if not self.c_default: + if default is unspecified: + if self.c_init_default: + self.c_default = self.c_init_default + elif default is NULL: + self.c_default = self.c_ignored_default or self.c_init_default + if not self.c_default: + cls_name = self.__class__.__name__ + fail(f"{cls_name}: c_default is required for " + f"default value NULL") + else: + assert default is not unknown + self.c_default_init() + if not self.c_default: + if default is None: + self.c_default = self.c_init_default + if not self.c_default: + cls_name = self.__class__.__name__ + fail(f"{cls_name}: c_default is required for " + f"default value None") + elif isinstance(default, str): + self.c_default = libclinic.c_str_repr(default) + elif isinstance(default, bytes): + self.c_default = libclinic.c_bytes_repr(default) + elif isinstance(default, (int, float)): + self.c_default = repr(default) + else: + cls_name = self.__class__.__name__ + fail(f"{cls_name}: c_default is required for " + f"default value {default!r}") + fail(f"Unsupported default value {default!r}.") - # Make sure not to set self.function until after converter_init() has been called. - # This prevents you from caching information - # about the function in converter_init(). - # (That breaks if we get cloned.) - self.converter_init(**kwargs) self.function = function # Add a custom __getattr__ method to improve the error message @@ -233,6 +275,9 @@ def __getattr__(self, attr): def converter_init(self) -> None: pass + def c_default_init(self) -> None: + return + def is_optional(self) -> bool: return (self.default is not unspecified) @@ -324,7 +369,7 @@ def parse_argument(self, args: list[str]) -> None: args.append(self.converter) if self.encoding: - args.append(libclinic.c_repr(self.encoding)) + args.append(libclinic.c_str_repr(self.encoding)) elif self.subclass_of: args.append(self.subclass_of) @@ -371,7 +416,7 @@ def declaration(self, *, in_parser: bool = False) -> str: declaration = [self.simple_declaration(in_parser=True)] default = self.c_default if not default and self.parameter.group: - default = self.c_ignored_default + default = self.c_ignored_default or self.c_init_default if default: declaration.append(" = ") declaration.append(default) diff --git a/Tools/clinic/libclinic/converters.py b/Tools/clinic/libclinic/converters.py index bc21ae84e1c332..76091a9eedc1bf 100644 --- a/Tools/clinic/libclinic/converters.py +++ b/Tools/clinic/libclinic/converters.py @@ -4,7 +4,7 @@ from types import NoneType from typing import Any -from libclinic import fail, Null, unspecified, unknown +from libclinic import fail, NullType, unspecified, NULL, c_bytes_repr, c_unichar_repr from libclinic.function import ( Function, Parameter, CALLABLE, STATIC_METHOD, CLASS_METHOD, METHOD_INIT, METHOD_NEW, @@ -19,6 +19,8 @@ class BaseUnsignedIntConverter(CConverter): bitwise = False + default_type = int + c_ignored_default = '0' def use_converter(self) -> None: if self.converter: @@ -107,12 +109,13 @@ class bool_converter(CConverter): def converter_init(self, *, accept: TypeSet = {object}) -> None: if accept == {int}: self.format_unit = 'i' + self.default_type = int # type: ignore[assignment] elif accept != {object}: fail(f"bool_converter: illegal 'accept' argument {accept!r}") - if self.default is not unspecified and self.default is not unknown: - self.default = bool(self.default) - if self.c_default in {'Py_True', 'Py_False'}: - self.c_default = str(int(self.default)) + + def c_default_init(self) -> None: + assert isinstance(self.default, int) + self.c_default = str(int(self.default)) def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'i': @@ -140,6 +143,7 @@ class defining_class_converter(CConverter): this is the default converter used for the defining class. """ type = 'PyTypeObject *' + default_type = () format_unit = '' show_in_signature = False specified_type: str | None = None @@ -156,7 +160,7 @@ def set_template_dict(self, template_dict: TemplateDict) -> None: class char_converter(CConverter): type = 'char' - default_type = (bytes, bytearray) + default_type = bytes format_unit = 'c' c_ignored_default = "'\0'" @@ -165,9 +169,18 @@ def converter_init(self) -> None: if len(self.default) != 1: fail(f"char_converter: illegal default value {self.default!r}") - self.c_default = repr(bytes(self.default))[1:] - if self.c_default == '"\'"': - self.c_default = r"'\''" + def c_default_init(self) -> None: + default = self.default + assert isinstance(default, bytes) + if default == b"'": + self.c_default = r"'\''" + elif default == b'"': + self.c_default = r"""'"'""" + elif default == b'\0': + self.c_default = r"'\0'" + else: + r = c_bytes_repr(default)[1:-1] + self.c_default = "'" + r + "'" def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'c': @@ -207,7 +220,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st @add_legacy_c_converter('B', bitwise=True) class unsigned_char_converter(BaseUnsignedIntConverter): type = 'unsigned char' - default_type = int format_unit = 'b' c_ignored_default = "'\0'" @@ -282,8 +294,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class unsigned_short_converter(BaseUnsignedIntConverter): type = 'unsigned short' - default_type = int - c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: self.bitwise = bitwise @@ -305,11 +315,19 @@ def converter_init( ) -> None: if accept == {str}: self.format_unit = 'C' + self.default_type = str # type: ignore[assignment] + if isinstance(self.default, str): + if len(self.default) != 1: + fail(f"int_converter: illegal default value {self.default!r}") elif accept != {int}: fail(f"int_converter: illegal 'accept' argument {accept!r}") if type is not None: self.type = type + def c_default_init(self) -> None: + if isinstance(self.default, str): + self.c_default = c_unichar_repr(self.default) + def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'i': return self.format_code(""" @@ -343,8 +361,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class unsigned_int_converter(BaseUnsignedIntConverter): type = 'unsigned int' - default_type = int - c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: self.bitwise = bitwise @@ -374,8 +390,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class unsigned_long_converter(BaseUnsignedIntConverter): type = 'unsigned long' - default_type = int - c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: self.bitwise = bitwise @@ -405,8 +419,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class unsigned_long_long_converter(BaseUnsignedIntConverter): type = 'unsigned long long' - default_type = int - c_ignored_default = "0" def converter_init(self, *, bitwise: bool = False) -> None: self.bitwise = bitwise @@ -418,6 +430,7 @@ def converter_init(self, *, bitwise: bool = False) -> None: class Py_ssize_t_converter(CConverter): type = 'Py_ssize_t' + default_type = (int, NoneType) c_ignored_default = "0" def converter_init(self, *, accept: TypeSet = {int}, @@ -425,7 +438,7 @@ def converter_init(self, *, accept: TypeSet = {int}, self.allow_negative = allow_negative if accept == {int}: self.format_unit = 'n' - self.default_type = int + self.default_type = int # type: ignore[assignment] elif accept == {int, NoneType}: if self.allow_negative: self.converter = '_Py_convert_optional_to_ssize_t' @@ -501,10 +514,13 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class slice_index_converter(CConverter): type = 'Py_ssize_t' + default_type = (int, NoneType) + c_ignored_default = "0" def converter_init(self, *, accept: TypeSet = {int, NoneType}) -> None: if accept == {int}: self.converter = '_PyEval_SliceIndexNotNone' + self.default_type = int # type: ignore[assignment] self.nullable = False elif accept == {int, NoneType}: self.converter = '_PyEval_SliceIndex' @@ -554,7 +570,6 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class size_t_converter(BaseUnsignedIntConverter): type = 'size_t' converter = '_PyLong_Size_t_Converter' - c_ignored_default = "0" def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'n': @@ -673,6 +688,7 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class object_converter(CConverter): type = 'PyObject *' format_unit = 'O' + c_ignored_default = 'NULL' def converter_init( self, *, @@ -692,6 +708,10 @@ def converter_init( if type is not None: self.type = type + def c_default_init(self) -> None: + default = self.default + if default is None or isinstance(default, bool): + self.c_default = "Py_" + repr(default) # # We define three conventions for buffer types in the 'accept' argument: @@ -721,8 +741,9 @@ def str_converter_key( class str_converter(CConverter): type = 'const char *' - default_type = (str, Null, NoneType) + default_type = (str, bytes, NullType, NoneType) format_unit = 's' + c_ignored_default = 'NULL' def converter_init( self, @@ -740,14 +761,16 @@ def converter_init( self.format_unit = format_unit self.length = bool(zeroes) if encoding: - if self.default not in (Null, None, unspecified): + if self.default not in (NULL, None, unspecified): fail("str_converter: Argument Clinic doesn't support default values for encoded strings") self.encoding = encoding self.type = 'char *' # sorry, clinic can't support preallocated buffers # for es# and et# self.c_default = "NULL" - if NoneType in accept and self.c_default == "Py_None": + + def c_default_init(self) -> None: + if self.default is None: self.c_default = "NULL" def post_parsing(self) -> str: @@ -860,6 +883,7 @@ class PyBytesObject_converter(CConverter): type = 'PyBytesObject *' format_unit = 'S' # accept = {bytes} + c_ignored_default = 'NULL' def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'S': @@ -880,6 +904,7 @@ class PyByteArrayObject_converter(CConverter): type = 'PyByteArrayObject *' format_unit = 'Y' # accept = {bytearray} + c_ignored_default = 'NULL' def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'Y': @@ -898,8 +923,9 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class unicode_converter(CConverter): type = 'PyObject *' - default_type = (str, Null, NoneType) + default_type = (str, NullType, NoneType) format_unit = 'U' + c_ignored_default = 'NULL' def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: if self.format_unit == 'U': @@ -918,11 +944,11 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st class _unicode_fs_converter_base(CConverter): type = 'PyObject *' + default_type = NullType + c_init_default = 'NULL' - def converter_init(self) -> None: - if self.default is not unspecified: - fail(f"{self.__class__.__name__} does not support default values") - self.c_default = 'NULL' + def c_default_init(self) -> None: + fail(f"{self.__class__.__name__} does not support default values") def cleanup(self) -> str: return f"Py_XDECREF({self.parser_name});" @@ -942,7 +968,8 @@ class unicode_fs_decoded_converter(_unicode_fs_converter_base): @add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True) class Py_UNICODE_converter(CConverter): type = 'const wchar_t *' - default_type = (str, Null, NoneType) + default_type = (str, NullType, NoneType) + c_ignored_default = 'NULL' def converter_init( self, *, @@ -958,6 +985,7 @@ def converter_init( self.accept = accept if accept == {str}: self.converter = '_PyUnicode_WideCharString_Converter' + self.default_type = (str, NullType) # type: ignore[assignment] elif accept == {str, NoneType}: self.converter = '_PyUnicode_WideCharString_Opt_Converter' else: @@ -1013,28 +1041,34 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st @add_legacy_c_converter('w*', accept={rwbuffer}) class Py_buffer_converter(CConverter): type = 'Py_buffer' + default_type = (str, bytes, NullType, NoneType) format_unit = 'y*' impl_by_reference = True - c_ignored_default = "{NULL, NULL}" + c_init_default = "{NULL, NULL}" def converter_init(self, *, accept: TypeSet = {buffer}) -> None: - if self.default not in (unspecified, None): - fail("The only legal default value for Py_buffer is None.") - - self.c_default = self.c_ignored_default - if accept == {str, buffer, NoneType}: - format_unit = 'z*' + self.format_unit = 'z*' + self.default_type = (str, bytes, NullType, NoneType) elif accept == {str, buffer}: - format_unit = 's*' + self.format_unit = 's*' + self.default_type = (str, bytes, NullType) # type: ignore[assignment] elif accept == {buffer}: - format_unit = 'y*' + self.format_unit = 'y*' + self.default_type = (bytes, NullType) # type: ignore[assignment] elif accept == {rwbuffer}: - format_unit = 'w*' + self.format_unit = 'w*' + self.default_type = NullType # type: ignore[assignment] else: fail("Py_buffer_converter: illegal combination of arguments") - self.format_unit = format_unit + def c_default_init(self) -> None: + default = self.default + if isinstance(default, bytes): + self.c_default = f'{{.buf = {c_bytes_repr(default)}, .obj = NULL, .len = {len(default)}}}' + elif isinstance(default, str): + default = default.encode() + self.c_default = f'{{.buf = {c_bytes_repr(default)}, .obj = NULL, .len = {len(default)}}}' def cleanup(self) -> str: name = self.name @@ -1115,6 +1149,7 @@ class self_converter(CConverter): this is the default converter used for "self". """ type: str | None = None + default_type = () format_unit = '' specified_type: str | None = None @@ -1229,6 +1264,7 @@ def use_pyobject_self(self, func: Function) -> bool: # Converters for var-positional parameter. class VarPosCConverter(CConverter): + default_type = () format_unit = '' def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: @@ -1241,8 +1277,7 @@ def parse_vararg(self, *, pos_only: int, min_pos: int, max_pos: int, class varpos_tuple_converter(VarPosCConverter): type = 'PyObject *' - format_unit = '' - c_default = 'NULL' + c_init_default = 'NULL' def cleanup(self) -> str: return f"""Py_XDECREF({self.parser_name});\n""" @@ -1299,7 +1334,6 @@ def parse_vararg(self, *, pos_only: int, min_pos: int, max_pos: int, class varpos_array_converter(VarPosCConverter): type = 'PyObject * const *' length = True - c_ignored_default = '' def parse_vararg(self, *, pos_only: int, min_pos: int, max_pos: int, fastcall: bool, limited_capi: bool) -> str: @@ -1324,6 +1358,7 @@ def parse_vararg(self, *, pos_only: int, min_pos: int, max_pos: int, # Converters for var-keyword parameters. class VarKeywordCConverter(CConverter): + default_type = () format_unit = '' def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> str | None: @@ -1335,7 +1370,7 @@ def parse_var_keyword(self) -> str: class var_keyword_dict_converter(VarKeywordCConverter): type = 'PyObject *' - c_default = 'NULL' + c_init_default = 'NULL' def cleanup(self) -> str: return f'Py_XDECREF({self.parser_name});\n' diff --git a/Tools/clinic/libclinic/dsl_parser.py b/Tools/clinic/libclinic/dsl_parser.py index 0d83baeba9e508..90e2e0d3d9c928 100644 --- a/Tools/clinic/libclinic/dsl_parser.py +++ b/Tools/clinic/libclinic/dsl_parser.py @@ -7,7 +7,7 @@ import shlex import sys from collections.abc import Callable -from types import FunctionType, NoneType +from types import FunctionType from typing import TYPE_CHECKING, Any, NamedTuple import libclinic @@ -947,16 +947,17 @@ def parse_parameter(self, line: str) -> None: name = f'var_keyword_{name}' value: object + has_c_default = 'c_default' in kwargs if not function_args.defaults: - if is_vararg or is_var_keyword: - value = NULL - else: - if self.parameter_state is ParamState.OPTIONAL: - fail(f"Can't have a parameter without a default ({parameter_name!r}) " - "after a parameter with a default!") - value = unspecified + value = unspecified + if (not is_vararg and not is_var_keyword + and self.parameter_state is ParamState.OPTIONAL): + fail(f"Can't have a parameter without a default ({parameter_name!r}) " + "after a parameter with a default!") if 'py_default' in kwargs: fail("You can't specify py_default without specifying a default value!") + if has_c_default: + fail("You can't specify c_default without specifying a default value!") else: expr = function_args.defaults[0] default = ast_input[expr.col_offset: expr.end_col_offset].strip() @@ -965,7 +966,7 @@ def parse_parameter(self, line: str) -> None: self.parameter_state = ParamState.OPTIONAL bad = False try: - if 'c_default' not in kwargs: + if not has_c_default: # we can only represent very simple data values in C. # detect whether default is okay, via a denylist # of disallowed ast nodes. @@ -1011,18 +1012,15 @@ def bad_node(self, node: ast.AST) -> None: fail(f"Unsupported expression as default value: {default!r}") # mild hack: explicitly support NULL as a default value - c_default: str | None if isinstance(expr, ast.Name) and expr.id == 'NULL': value = NULL py_default = '' - c_default = "NULL" elif (isinstance(expr, ast.BinOp) or (isinstance(expr, ast.UnaryOp) and not (isinstance(expr.operand, ast.Constant) and type(expr.operand.value) in {int, float, complex}) )): - c_default = kwargs.get("c_default") - if not (isinstance(c_default, str) and c_default): + if not has_c_default: fail(f"When you specify an expression ({default!r}) " f"as your default value, " f"you MUST specify a valid c_default.", @@ -1041,8 +1039,7 @@ def bad_node(self, node: ast.AST) -> None: a.append(n.id) py_default = ".".join(reversed(a)) - c_default = kwargs.get("c_default") - if not (isinstance(c_default, str) and c_default): + if not has_c_default: fail(f"When you specify a named constant ({py_default!r}) " "as your default value, " "you MUST specify a valid c_default.") @@ -1054,23 +1051,15 @@ def bad_node(self, node: ast.AST) -> None: else: value = ast.literal_eval(expr) py_default = repr(value) - if isinstance(value, (bool, NoneType)): - c_default = "Py_" + py_default - elif isinstance(value, str): - c_default = libclinic.c_repr(value) - else: - c_default = py_default except (ValueError, AttributeError): value = unknown - c_default = kwargs.get("c_default") py_default = default - if not (isinstance(c_default, str) and c_default): + if not has_c_default: fail("When you specify a named constant " f"({py_default!r}) as your default value, " "you MUST specify a valid c_default.") - kwargs.setdefault('c_default', c_default) kwargs.setdefault('py_default', py_default) dict = legacy_converters if legacy else converters @@ -1093,12 +1082,10 @@ def bad_node(self, node: ast.AST) -> None: if isinstance(converter, self_converter): if len(self.function.parameters) == 1: - if self.parameter_state is not ParamState.REQUIRED: - fail("A 'self' parameter cannot be marked optional.") - if value is not unspecified: - fail("A 'self' parameter cannot have a default value.") if self.group: fail("A 'self' parameter cannot be in an optional group.") + assert self.parameter_state is ParamState.REQUIRED + assert value is unspecified kind = inspect.Parameter.POSITIONAL_ONLY self.parameter_state = ParamState.START self.function.parameters.clear() @@ -1109,14 +1096,12 @@ def bad_node(self, node: ast.AST) -> None: if isinstance(converter, defining_class_converter): _lp = len(self.function.parameters) if _lp == 1: - if self.parameter_state is not ParamState.REQUIRED: - fail("A 'defining_class' parameter cannot be marked optional.") - if value is not unspecified: - fail("A 'defining_class' parameter cannot have a default value.") if self.group: fail("A 'defining_class' parameter cannot be in an optional group.") if self.function.cls is None: fail("A 'defining_class' parameter cannot be defined at module level.") + assert self.parameter_state is ParamState.REQUIRED + assert value is unspecified kind = inspect.Parameter.POSITIONAL_ONLY else: fail("A 'defining_class' parameter, if specified, must either " diff --git a/Tools/clinic/libclinic/formatting.py b/Tools/clinic/libclinic/formatting.py index 873ece6210017a..264327818c1d19 100644 --- a/Tools/clinic/libclinic/formatting.py +++ b/Tools/clinic/libclinic/formatting.py @@ -39,8 +39,55 @@ def _quoted_for_c_string(text: str) -> str: return text -def c_repr(text: str) -> str: - return '"' + text + '"' +# Use octals, because \x... in C has arbitrary number of hexadecimal digits. +_c_repr = [chr(i) if 32 <= i < 127 else fr'\{i:03o}' for i in range(256)] +_c_repr[ord('"')] = r'\"' +_c_repr[ord('\\')] = r'\\' +_c_repr[ord('\a')] = r'\a' +_c_repr[ord('\b')] = r'\b' +_c_repr[ord('\f')] = r'\f' +_c_repr[ord('\n')] = r'\n' +_c_repr[ord('\r')] = r'\r' +_c_repr[ord('\t')] = r'\t' +_c_repr[ord('\v')] = r'\v' + +def _break_trigraphs(s: str) -> str: + # Prevent trigraphs from being interpreted inside string literals. + if '??' in s: + s = s.replace('??', r'?\?') + s = s.replace(r'\??', r'\?\?') + # Also Argument Clinic does not like comment-like sequences + # in string literals. + s = s.replace(r'/*', r'/\*') + s = s.replace(r'*/', r'*\/') + return s + +def c_bytes_repr(data: bytes) -> str: + r = ''.join(_c_repr[i] for i in data) + r = _break_trigraphs(r) + return '"' + r + '"' + +def c_str_repr(text: str) -> str: + r = ''.join(_c_repr[i] if i < 0x80 + else fr'\u{i:04x}' if i < 0x10000 + else fr'\U{i:08x}' + for i in map(ord, text)) + r = _break_trigraphs(r) + return '"' + r + '"' + +def c_unichar_repr(char: str) -> str: + if char == "'": + return r"'\''" + if char == '"': + return """'"'""" + if char == '\0': + return '0' + i = ord(char) + if i < 0x80: + r = _c_repr[i] + if not r.startswith((r'\0', r'\1')): + return "'" + r + "'" + return f'0x{i:02x}' def wrapped_c_string_literal( @@ -58,8 +105,8 @@ def wrapped_c_string_literal( drop_whitespace=False, break_on_hyphens=False, ) - separator = c_repr(suffix + "\n" + subsequent_indent * " ") - return initial_indent * " " + c_repr(separator.join(wrapped)) + separator = '"' + suffix + "\n" + subsequent_indent * " " + '"' + return initial_indent * " " + '"' + separator.join(wrapped) + '"' def _add_prefix_and_suffix(text: str, *, prefix: str = "", suffix: str = "") -> str: diff --git a/Tools/clinic/libclinic/utils.py b/Tools/clinic/libclinic/utils.py index 17e8f35be73bf4..3df64f270dd074 100644 --- a/Tools/clinic/libclinic/utils.py +++ b/Tools/clinic/libclinic/utils.py @@ -85,9 +85,9 @@ def __repr__(self) -> str: # This one needs to be a distinct class, unlike the other two -class Null: +class NullType: def __repr__(self) -> str: return '' -NULL = Null() +NULL = NullType()