diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py index cf4e32d0d8..b7c0a8c4c3 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py @@ -15,6 +15,7 @@ from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor # Mirrors WinBase.h (unfortunately not defined already elsewhere) +WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH = 0x00000008 WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 @@ -47,6 +48,17 @@ kernel32.AddDllDirectory.argtypes = [ctypes.wintypes.LPCWSTR] kernel32.AddDllDirectory.restype = ctypes.c_void_p # DLL_DIRECTORY_COOKIE +# SearchPathW - find a file in the system search path +kernel32.SearchPathW.argtypes = [ + ctypes.wintypes.LPCWSTR, # lpPath (NULL to use standard search) + ctypes.wintypes.LPCWSTR, # lpFileName + ctypes.wintypes.LPCWSTR, # lpExtension + ctypes.wintypes.DWORD, # nBufferLength + ctypes.wintypes.LPWSTR, # lpBuffer + ctypes.POINTER(ctypes.wintypes.LPWSTR), # lpFilePart +] +kernel32.SearchPathW.restype = ctypes.wintypes.DWORD + def ctypes_handle_to_unsigned_int(handle: ctypes.wintypes.HMODULE) -> int: """Convert ctypes HMODULE to unsigned int.""" @@ -115,21 +127,50 @@ def check_if_already_loaded_from_elsewhere(desc: LibDescriptor, have_abs_path: b return None +def _search_path_for_dll(dll_name: str) -> str | None: + """Search for a DLL using Windows SearchPathW. + + Args: + dll_name: The name of the DLL to find + + Returns: + The absolute path to the DLL if found, None otherwise + """ + buffer = ctypes.create_unicode_buffer(260) # MAX_PATH + length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None) + + if length == 0: + return None + + # If buffer was too small, try with larger buffer + if length > len(buffer): + buffer = ctypes.create_unicode_buffer(length) + length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None) + if length == 0: + return None + + return buffer.value + + def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None: """Try to load a DLL using system search paths. Args: - libname: The name of the library to load + desc: Descriptor for the library to load Returns: A LoadedDL object if successful, None if the library cannot be loaded """ # Reverse tabulated names to achieve new -> old search order. for dll_name in reversed(desc.windows_dlls): - handle = kernel32.LoadLibraryExW(dll_name, None, 0) - if handle: - abs_path = abs_path_for_dynamic_library(desc.name, handle) - return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") + # SearchPathW bypasses Python 3.8+'s SetDefaultDllDirectories restriction. + found_path = _search_path_for_dll(dll_name) + if found_path: + # LOAD_WITH_ALTERED_SEARCH_PATH additionally ensures dependencies + # are resolved from the DLL's directory. + handle = kernel32.LoadLibraryExW(found_path, None, WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH) + if handle: + return LoadedDL(found_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") return None diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 016acfd25d..34ab208c43 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -3,9 +3,12 @@ import os import platform +import subprocess import pytest from child_load_nvidia_dynamic_lib_helper import ( + LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD, + LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, build_child_process_failed_for_libname_message, run_load_nvidia_dynamic_lib_in_subprocess, ) @@ -14,7 +17,11 @@ from cuda.pathfinder import DynamicLibNotAvailableError, DynamicLibUnknownError, load_nvidia_dynamic_lib from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib_module from cuda.pathfinder._dynamic_libs import supported_nvidia_libs -from cuda.pathfinder._dynamic_libs.subprocess_protocol import STATUS_NOT_FOUND, parse_dynamic_lib_subprocess_payload +from cuda.pathfinder._dynamic_libs.subprocess_protocol import ( + STATUS_NOT_FOUND, + build_dynamic_lib_subprocess_command, + parse_dynamic_lib_subprocess_payload, +) from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") @@ -134,3 +141,85 @@ def raise_child_process_failed(): assert abs_path is not None info_summary_append(f"abs_path={quote_for_shell(abs_path)}") assert os.path.isfile(abs_path) # double-check the abs_path + + +def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append): + """Regression test for issue #1781: nvrtc must load without CUDA_HOME/CUDA_PATH. + + On Windows, Python 3.8+ calls SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS) + at startup, which excludes PATH from the LoadLibraryExW search order. The fix uses + SearchPathW (unaffected by SetDefaultDllDirectories) to locate the DLL via PATH, then + loads it by absolute path with LOAD_WITH_ALTERED_SEARCH_PATH. + + This test loads nvrtc twice in fresh subprocesses: once with the normal environment, + once with CUDA_HOME and CUDA_PATH stripped. If the normal load finds nvrtc in a + directory on PATH, the stripped load must also succeed — otherwise the system search + is broken. + """ + timeout = 120 if IS_WINDOWS else 30 + + # Phase 1: load nvrtc with normal environment. + normal_result = run_load_nvidia_dynamic_lib_in_subprocess("nvrtc", timeout=timeout) + if normal_result.returncode != 0: + raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", normal_result)) + assert not normal_result.stderr + normal_payload = parse_dynamic_lib_subprocess_payload( + normal_result.stdout, + libname="nvrtc", + error_label="Load subprocess child process (normal env)", + ) + if normal_payload.status == STATUS_NOT_FOUND: + info_summary_append("nvrtc not found (normal env)") + pytest.skip("nvrtc not available in this environment") + normal_abs_path = normal_payload.abs_path + assert normal_abs_path is not None + assert os.path.isfile(normal_abs_path) + info_summary_append(f"nvrtc (normal env): abs_path={quote_for_shell(normal_abs_path)}") + + # Phase 2: load nvrtc without CUDA_HOME/CUDA_PATH. + env = os.environ.copy() + env.pop("CUDA_HOME", None) + env.pop("CUDA_PATH", None) + command = build_dynamic_lib_subprocess_command(LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, "nvrtc") + stripped_result = subprocess.run( # noqa: S603 + command, + capture_output=True, + text=True, + timeout=timeout, + check=False, + env=env, + cwd=LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD, + ) + if stripped_result.returncode != 0: + raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", stripped_result)) + assert not stripped_result.stderr + stripped_payload = parse_dynamic_lib_subprocess_payload( + stripped_result.stdout, + libname="nvrtc", + error_label="Load subprocess child process (no CUDA_HOME/CUDA_PATH)", + ) + + # Phase 3: evaluate. + if stripped_payload.status != STATUS_NOT_FOUND: + stripped_abs_path = stripped_payload.abs_path + assert stripped_abs_path is not None + assert os.path.isfile(stripped_abs_path) + info_summary_append(f"nvrtc (no CUDA_HOME/CUDA_PATH): abs_path={quote_for_shell(stripped_abs_path)}") + return + + # nvrtc was found normally but not without CUDA_HOME/CUDA_PATH. + # If the DLL's directory is on PATH, the system search should have found it. + dll_dir = os.path.normcase(os.path.normpath(os.path.dirname(normal_abs_path))) + on_path = any( + os.path.normcase(os.path.normpath(d)) == dll_dir for d in os.environ.get("PATH", "").split(os.pathsep) if d + ) + if on_path: + pytest.fail( + f"nvrtc was found at {normal_abs_path!r} (directory is on PATH) " + f"but could not be loaded without CUDA_HOME/CUDA_PATH. " + f"System search should find DLLs in PATH directories." + ) + info_summary_append( + f"nvrtc (no CUDA_HOME/CUDA_PATH): not found " + f"(normal-env directory not on PATH: {quote_for_shell(os.path.dirname(normal_abs_path))})" + )