Skip to content

Commit

Permalink
pythongh-85858: Remove PyUnicode_InternImmortal() function (python#92579
Browse files Browse the repository at this point in the history
)

Remove the PyUnicode_InternImmortal() function and the
SSTATE_INTERNED_IMMORTAL macro.

The PyUnicode_InternImmortal() function is still exported in the
stable ABI. The function is removed from the API.

PyASCIIObject.state.interned size is now a single bit, rather than 2
bits.

Keep SSTATE_NOT_INTERNED and SSTATE_INTERNED_MORTAL macros for
backward compatibility, but no longer use them internally since the
interned member is now a single bit and so can only have two values
(interned or not interned).

Update stats of _PyUnicode_ClearInterned().
  • Loading branch information
vstinner committed May 13, 2022
1 parent f62ad4f commit 059b5ba
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 67 deletions.
1 change: 0 additions & 1 deletion Doc/data/stable_abi.dat

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,7 @@ Removed
* :c:func:`PyUnicode_GET_SIZE`
* :c:func:`PyUnicode_GetSize`
* :c:func:`PyUnicode_GET_DATA_SIZE`

* Remove the ``PyUnicode_InternImmortal()`` function and the
``SSTATE_INTERNED_IMMORTAL`` macro.
(Contributed by Victor Stinner in :gh:`85858`.)
13 changes: 3 additions & 10 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,9 @@ typedef struct {
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
struct {
/*
SSTATE_NOT_INTERNED (0)
SSTATE_INTERNED_MORTAL (1)
SSTATE_INTERNED_IMMORTAL (2)
If interned != SSTATE_NOT_INTERNED, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
*/
unsigned int interned:2;
/* If interned is set, the two references from the
dictionary to this object are *not* counted in ob_refcnt. */
unsigned int interned:1;
/* Character size:
- PyUnicode_1BYTE_KIND (1):
Expand Down Expand Up @@ -189,7 +183,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
/* Interning state. */
#define SSTATE_NOT_INTERNED 0
#define SSTATE_INTERNED_MORTAL 1
#define SSTATE_INTERNED_IMMORTAL 2

/* Use only if you know it's a string */
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
Expand Down
4 changes: 0 additions & 4 deletions Include/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,6 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
const char *u /* UTF-8 encoded string */
);

// PyUnicode_InternImmortal() is deprecated since Python 3.10
// and will be removed in Python 3.12. Use PyUnicode_InternInPlace() instead.
Py_DEPRECATED(3.10) PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);

/* --- wchar_t support for platforms which support it --------------------- */

#ifdef HAVE_WCHAR_H
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Remove the ``PyUnicode_InternImmortal()`` function and the
``SSTATE_INTERNED_IMMORTAL`` macro. Patch by Victor Stinner.
1 change: 1 addition & 0 deletions Misc/stable_abi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1563,6 +1563,7 @@
added = '3.2'
[function.PyUnicode_InternImmortal]
added = '3.2'
abi_only = true
[function.PyUnicode_InternInPlace]
added = '3.2'
[data.PyUnicode_Type]
Expand Down
69 changes: 17 additions & 52 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1516,13 +1516,8 @@ unicode_dealloc(PyObject *unicode)
}
#endif

switch (PyUnicode_CHECK_INTERNED(unicode)) {
case SSTATE_NOT_INTERNED:
break;

case SSTATE_INTERNED_MORTAL:
{
#ifdef INTERNED_STRINGS
if (PyUnicode_CHECK_INTERNED(unicode)) {
/* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
Expand All @@ -1536,17 +1531,8 @@ unicode_dealloc(PyObject *unicode)
}
assert(Py_REFCNT(unicode) == 1);
Py_SET_REFCNT(unicode, 0);
#endif
break;
}

case SSTATE_INTERNED_IMMORTAL:
_PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
break;

default:
Py_UNREACHABLE();
}
#endif

if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyObject_Free(_PyUnicode_UTF8(unicode));
Expand Down Expand Up @@ -14674,31 +14660,22 @@ PyUnicode_InternInPlace(PyObject **p)
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
this. */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
_PyUnicode_STATE(s).interned = 1;
#else
// PyDict expects that interned strings have their hash
// (PyASCIIObject.hash) already computed.
(void)unicode_hash(s);
#endif
}

// Function kept for the stable ABI.
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
void
PyUnicode_InternImmortal(PyObject **p)
{
if (PyErr_WarnEx(PyExc_DeprecationWarning,
"PyUnicode_InternImmortal() is deprecated; "
"use PyUnicode_InternInPlace() instead", 1) < 0)
{
// The function has no return value, the exception cannot
// be reported to the caller, so just log it.
PyErr_WriteUnraisable(NULL);
}

PyUnicode_InternInPlace(p);
if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
_PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
Py_INCREF(*p);
}
// Leak a reference on purpose
Py_INCREF(*p);
}

PyObject *
Expand Down Expand Up @@ -14733,37 +14710,25 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));

Py_ssize_t immortal_size = 0, mortal_size = 0;
Py_ssize_t total_length = 0;
#endif
Py_ssize_t pos = 0;
PyObject *s, *ignored_value;
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
#ifdef INTERNED_STATS
immortal_size += PyUnicode_GET_LENGTH(s);
#endif
break;
case SSTATE_INTERNED_MORTAL:
// Restore the two references (key and value) ignored
// by PyUnicode_InternInPlace().
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
assert(PyUnicode_CHECK_INTERNED(s));
// Restore the two references (key and value) ignored
// by PyUnicode_InternInPlace().
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
#ifdef INTERNED_STATS
mortal_size += PyUnicode_GET_LENGTH(s);
total_length += PyUnicode_GET_LENGTH(s);
#endif
break;
case SSTATE_NOT_INTERNED:
/* fall through */
default:
Py_UNREACHABLE();
}
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;

_PyUnicode_STATE(s).interned = 0;
}
#ifdef INTERNED_STATS
fprintf(stderr,
"total size of all interned strings: %zd/%zd mortal/immortal\n",
mortal_size, immortal_size);
"total length of all interned strings: %zd characters\n",
total_length);
#endif

PyDict_Clear(interned);
Expand Down

0 comments on commit 059b5ba

Please sign in to comment.