Skip to content

Commit

Permalink
bpo-44582: Accelerate mimetypes.init on Windows with a native acceler…
Browse files Browse the repository at this point in the history
…ator (pythonGH-27059)
  • Loading branch information
zooba committed Jul 8, 2021
1 parent af4a2dc commit bbf2fb6
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 7 deletions.
27 changes: 22 additions & 5 deletions Lib/mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
import sys
import posixpath
import urllib.parse

try:
from _winapi import _mimetypes_read_windows_registry
except ImportError:
_mimetypes_read_windows_registry = None

try:
import winreg as _winreg
except ImportError:
Expand Down Expand Up @@ -237,10 +243,21 @@ def read_windows_registry(self, strict=True):
types.
"""

# Windows only
if not _winreg:
if not _mimetypes_read_windows_registry and not _winreg:
return

add_type = self.add_type
if strict:
add_type = lambda type, ext: self.add_type(type, ext, True)

# Accelerated function if it is available
if _mimetypes_read_windows_registry:
_mimetypes_read_windows_registry(add_type)
elif _winreg:
self._read_windows_registry(add_type)

@classmethod
def _read_windows_registry(cls, add_type):
def enum_types(mimedb):
i = 0
while True:
Expand All @@ -265,7 +282,7 @@ def enum_types(mimedb):
subkey, 'Content Type')
if datatype != _winreg.REG_SZ:
continue
self.add_type(mimetype, subkeyname, strict)
add_type(mimetype, subkeyname)
except OSError:
continue

Expand Down Expand Up @@ -349,8 +366,8 @@ def init(files=None):

if files is None or _db is None:
db = MimeTypes()
if _winreg:
db.read_windows_registry()
# Quick return if not supported
db.read_windows_registry()

if files is None:
files = knownfiles
Expand Down
21 changes: 20 additions & 1 deletion Lib/test/test_mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
from test.support import os_helper
from platform import win32_edition

try:
import _winapi
except ImportError:
_winapi = None


def setUpModule():
global knownfiles
Expand Down Expand Up @@ -235,6 +240,21 @@ def test_registry_parsing(self):
eq(self.db.guess_type("image.jpg"), ("image/jpeg", None))
eq(self.db.guess_type("image.png"), ("image/png", None))

@unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"),
"read_windows_registry accelerator unavailable")
def test_registry_accelerator(self):
from_accel = {}
from_reg = {}
_winapi._mimetypes_read_windows_registry(
lambda v, k: from_accel.setdefault(k, set()).add(v)
)
mimetypes.MimeTypes._read_windows_registry(
lambda v, k: from_reg.setdefault(k, set()).add(v)
)
self.assertEqual(list(from_reg), list(from_accel))
for k in from_reg:
self.assertEqual(from_reg[k], from_accel[k])


class MiscTestCase(unittest.TestCase):
def test__all__(self):
Expand Down Expand Up @@ -288,6 +308,5 @@ def test_guess_type(self):
type_info = self.mimetypes_cmd("foo.pic")
eq(type_info, "I don't know anything about type foo.pic")


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Accelerate speed of :mod:`mimetypes` initialization using a native
implementation of the registry scan.
108 changes: 108 additions & 0 deletions Modules/_winapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,113 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle)
return result;
}

/*[clinic input]
_winapi._mimetypes_read_windows_registry
on_type_read: object
Optimized function for reading all known MIME types from the registry.
*on_type_read* is a callable taking *type* and *ext* arguments, as for
MimeTypes.add_type.
[clinic start generated code]*/

static PyObject *
_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
PyObject *on_type_read)
/*[clinic end generated code: output=20829f00bebce55b input=cd357896d6501f68]*/
{
#define CCH_EXT 128
#define CB_TYPE 510
struct {
wchar_t ext[CCH_EXT];
wchar_t type[CB_TYPE / sizeof(wchar_t) + 1];
} entries[64];
int entry = 0;
HKEY hkcr = NULL;
LRESULT err;

Py_BEGIN_ALLOW_THREADS
err = RegOpenKeyExW(HKEY_CLASSES_ROOT, NULL, 0, KEY_READ, &hkcr);
for (DWORD i = 0; err == ERROR_SUCCESS || err == ERROR_MORE_DATA; ++i) {
LPWSTR ext = entries[entry].ext;
LPWSTR type = entries[entry].type;
DWORD cchExt = CCH_EXT;
DWORD cbType = CB_TYPE;
HKEY subkey;
DWORD regType;

err = RegEnumKeyExW(hkcr, i, ext, &cchExt, NULL, NULL, NULL, NULL);
if (err != ERROR_SUCCESS || (cchExt && ext[0] != L'.')) {
continue;
}

err = RegOpenKeyExW(hkcr, ext, 0, KEY_READ, &subkey);
if (err == ERROR_FILE_NOT_FOUND) {
err = ERROR_SUCCESS;
continue;
} else if (err != ERROR_SUCCESS) {
continue;
}

err = RegQueryValueExW(subkey, L"Content Type", NULL,
&regType, (LPBYTE)type, &cbType);
RegCloseKey(subkey);
if (err == ERROR_FILE_NOT_FOUND) {
err = ERROR_SUCCESS;
continue;
} else if (err != ERROR_SUCCESS) {
continue;
} else if (regType != REG_SZ || !cbType) {
continue;
}
type[cbType / sizeof(wchar_t)] = L'\0';

entry += 1;

/* Flush our cached entries if we are full */
if (entry == sizeof(entries) / sizeof(entries[0])) {
Py_BLOCK_THREADS
for (int j = 0; j < entry; ++j) {
PyObject *r = PyObject_CallFunction(
on_type_read, "uu", entries[j].type, entries[j].ext
);
if (!r) {
/* We blocked threads, so safe to return from here */
RegCloseKey(hkcr);
return NULL;
}
Py_DECREF(r);
}
Py_UNBLOCK_THREADS
entry = 0;
}
}
if (hkcr) {
RegCloseKey(hkcr);
}
Py_END_ALLOW_THREADS

if (err != ERROR_SUCCESS && err != ERROR_NO_MORE_ITEMS) {
PyErr_SetFromWindowsErr((int)err);
return NULL;
}

for (int j = 0; j < entry; ++j) {
PyObject *r = PyObject_CallFunction(
on_type_read, "uu", entries[j].type, entries[j].ext
);
if (!r) {
return NULL;
}
Py_DECREF(r);
}

Py_RETURN_NONE;
#undef CCH_EXT
#undef CB_TYPE
}


static PyMethodDef winapi_functions[] = {
_WINAPI_CLOSEHANDLE_METHODDEF
Expand Down Expand Up @@ -1926,6 +2033,7 @@ static PyMethodDef winapi_functions[] = {
_WINAPI_WRITEFILE_METHODDEF
_WINAPI_GETACP_METHODDEF
_WINAPI_GETFILETYPE_METHODDEF
_WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF
{NULL, NULL}
};

Expand Down
38 changes: 37 additions & 1 deletion Modules/clinic/_winapi.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bbf2fb6

Please sign in to comment.