From 0a425e1ab43924dd5a9f1ec99fe8e3abf97cd3db Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 15 Jan 2026 22:37:26 -0800 Subject: [PATCH 1/5] pathfinder: Use LOAD_WITH_ALTERED_SEARCH_PATH for system DLL search on Windows When loading CUDA DLLs via system search on Windows, the previous approach using LoadLibraryExW with flags=0 would find the DLL on PATH but fail to locate its co-located dependencies (error 126). This fix uses SearchPathW to first find the DLL's full path, then loads it with LOAD_WITH_ALTERED_SEARCH_PATH so Windows searches for dependencies starting from the DLL's directory. --- .../_dynamic_libs/load_dl_windows.py | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py index b9f15ea50b..247cc4d467 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py @@ -13,6 +13,7 @@ ) # Mirrors WinBase.h (unfortunately not defined already elsewhere) +WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH = 0x00000008 WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 @@ -45,6 +46,17 @@ kernel32.AddDllDirectory.argtypes = [ctypes.wintypes.LPCWSTR] kernel32.AddDllDirectory.restype = ctypes.c_void_p # DLL_DIRECTORY_COOKIE +# SearchPathW - find a file in the system search path +kernel32.SearchPathW.argtypes = [ + ctypes.wintypes.LPCWSTR, # lpPath (NULL to use standard search) + ctypes.wintypes.LPCWSTR, # lpFileName + ctypes.wintypes.LPCWSTR, # lpExtension + ctypes.wintypes.DWORD, # nBufferLength + ctypes.wintypes.LPWSTR, # lpBuffer + ctypes.POINTER(ctypes.wintypes.LPWSTR), # lpFilePart +] +kernel32.SearchPathW.restype = ctypes.wintypes.DWORD + def ctypes_handle_to_unsigned_int(handle: ctypes.wintypes.HMODULE) -> int: """Convert ctypes HMODULE to unsigned int.""" @@ -113,6 +125,31 @@ def check_if_already_loaded_from_elsewhere(libname: str, have_abs_path: bool) -> return None +def _search_path_for_dll(dll_name: str) -> str | None: + """Search for a DLL using Windows SearchPathW. + + Args: + dll_name: The name of the DLL to find + + Returns: + The absolute path to the DLL if found, None otherwise + """ + buffer = ctypes.create_unicode_buffer(260) # MAX_PATH + length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None) + + if length == 0: + return None + + # If buffer was too small, try with larger buffer + if length > len(buffer): + buffer = ctypes.create_unicode_buffer(length) + length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None) + if length == 0: + return None + + return buffer.value + + def load_with_system_search(libname: str) -> LoadedDL | None: """Try to load a DLL using system search paths. @@ -124,10 +161,15 @@ def load_with_system_search(libname: str) -> LoadedDL | None: """ # Reverse tabulated names to achieve new → old search order. for dll_name in reversed(SUPPORTED_WINDOWS_DLLS.get(libname, ())): - handle = kernel32.LoadLibraryExW(dll_name, None, 0) - if handle: - abs_path = abs_path_for_dynamic_library(libname, handle) - return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") + # First, find the DLL's full path using SearchPathW + found_path = _search_path_for_dll(dll_name) + if found_path: + # Load with LOAD_WITH_ALTERED_SEARCH_PATH so Windows searches for + # dependencies from the DLL's directory (required for CUDA DLLs + # whose dependencies are co-located) + handle = kernel32.LoadLibraryExW(found_path, None, WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH) + if handle: + return LoadedDL(found_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") return None From 2f024a57e24ee0cdfe336546b08d4304d54a1b2c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Wed, 18 Mar 2026 17:30:25 -0700 Subject: [PATCH 2/5] test: add regression test for #1781 (nvrtc loading without CUDA_HOME/CUDA_PATH) Loads nvrtc in a subprocess with CUDA_HOME and CUDA_PATH stripped from the environment. On Windows CI where nvrtc is only reachable via PATH, this exercises the LOAD_WITH_ALTERED_SEARCH_PATH fix; on other platforms the test passes harmlessly via whatever search path finds nvrtc first. Made-with: Cursor --- .../tests/test_load_nvidia_dynamic_lib.py | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 016acfd25d..abc0aa0ca9 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -3,9 +3,12 @@ import os import platform +import subprocess import pytest from child_load_nvidia_dynamic_lib_helper import ( + LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD, + LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, build_child_process_failed_for_libname_message, run_load_nvidia_dynamic_lib_in_subprocess, ) @@ -14,7 +17,11 @@ from cuda.pathfinder import DynamicLibNotAvailableError, DynamicLibUnknownError, load_nvidia_dynamic_lib from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib_module from cuda.pathfinder._dynamic_libs import supported_nvidia_libs -from cuda.pathfinder._dynamic_libs.subprocess_protocol import STATUS_NOT_FOUND, parse_dynamic_lib_subprocess_payload +from cuda.pathfinder._dynamic_libs.subprocess_protocol import ( + STATUS_NOT_FOUND, + build_dynamic_lib_subprocess_command, + parse_dynamic_lib_subprocess_payload, +) from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") @@ -134,3 +141,54 @@ def raise_child_process_failed(): assert abs_path is not None info_summary_append(f"abs_path={quote_for_shell(abs_path)}") assert os.path.isfile(abs_path) # double-check the abs_path + + +def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append): + """Regression test for issue #1781: nvrtc must load without CUDA_HOME/CUDA_PATH. + + On Windows, when CUDA DLLs are discovered via PATH (system search), the + previous LoadLibraryExW(flags=0) call would find the DLL but fail to + resolve co-located dependencies like nvrtc-builtins (error 126). + + The fix uses SearchPathW to resolve the full path, then loads with + LOAD_WITH_ALTERED_SEARCH_PATH so dependency search starts from the + DLL's directory. + + This test strips CUDA_HOME and CUDA_PATH, then loads nvrtc in a fresh + subprocess. In CI environments where nvrtc is only available via system + search, this exercises the exact code path that was broken. + """ + env = os.environ.copy() + env.pop("CUDA_HOME", None) + env.pop("CUDA_PATH", None) + + timeout = 120 if IS_WINDOWS else 30 + command = build_dynamic_lib_subprocess_command(LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, "nvrtc") + result = subprocess.run( # noqa: S603 + command, + capture_output=True, + text=True, + timeout=timeout, + check=False, + env=env, + cwd=LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD, + ) + + if result.returncode != 0: + raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", result)) + assert not result.stderr + + payload = parse_dynamic_lib_subprocess_payload( + result.stdout, + libname="nvrtc", + error_label="Load subprocess child process (no CUDA_HOME/CUDA_PATH)", + ) + + if payload.status == STATUS_NOT_FOUND: + info_summary_append("nvrtc not found without CUDA_HOME/CUDA_PATH") + pytest.skip("nvrtc not available without CUDA_HOME/CUDA_PATH") + + abs_path = payload.abs_path + assert abs_path is not None + info_summary_append(f"nvrtc (no CUDA_HOME/CUDA_PATH): abs_path={quote_for_shell(abs_path)}") + assert os.path.isfile(abs_path) From b2aabfe7eb29f477f33e1816801cdda6792ae891 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 19 Mar 2026 14:42:15 -0700 Subject: [PATCH 3/5] test: harden nvrtc regression test with PATH-based failure detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also load nvrtc with the normal environment first. If the normal load finds nvrtc in a directory on PATH but the CUDA_HOME/CUDA_PATH-stripped load fails, the test now fails instead of skipping — directly catching the Windows bug where SetDefaultDllDirectories excludes PATH from LoadLibraryExW. Made-with: Cursor --- .../tests/test_load_nvidia_dynamic_lib.py | 89 +++++++++++++------ 1 file changed, 60 insertions(+), 29 deletions(-) diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index abc0aa0ca9..34ab208c43 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -146,25 +146,42 @@ def raise_child_process_failed(): def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append): """Regression test for issue #1781: nvrtc must load without CUDA_HOME/CUDA_PATH. - On Windows, when CUDA DLLs are discovered via PATH (system search), the - previous LoadLibraryExW(flags=0) call would find the DLL but fail to - resolve co-located dependencies like nvrtc-builtins (error 126). - - The fix uses SearchPathW to resolve the full path, then loads with - LOAD_WITH_ALTERED_SEARCH_PATH so dependency search starts from the - DLL's directory. - - This test strips CUDA_HOME and CUDA_PATH, then loads nvrtc in a fresh - subprocess. In CI environments where nvrtc is only available via system - search, this exercises the exact code path that was broken. + On Windows, Python 3.8+ calls SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS) + at startup, which excludes PATH from the LoadLibraryExW search order. The fix uses + SearchPathW (unaffected by SetDefaultDllDirectories) to locate the DLL via PATH, then + loads it by absolute path with LOAD_WITH_ALTERED_SEARCH_PATH. + + This test loads nvrtc twice in fresh subprocesses: once with the normal environment, + once with CUDA_HOME and CUDA_PATH stripped. If the normal load finds nvrtc in a + directory on PATH, the stripped load must also succeed — otherwise the system search + is broken. """ + timeout = 120 if IS_WINDOWS else 30 + + # Phase 1: load nvrtc with normal environment. + normal_result = run_load_nvidia_dynamic_lib_in_subprocess("nvrtc", timeout=timeout) + if normal_result.returncode != 0: + raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", normal_result)) + assert not normal_result.stderr + normal_payload = parse_dynamic_lib_subprocess_payload( + normal_result.stdout, + libname="nvrtc", + error_label="Load subprocess child process (normal env)", + ) + if normal_payload.status == STATUS_NOT_FOUND: + info_summary_append("nvrtc not found (normal env)") + pytest.skip("nvrtc not available in this environment") + normal_abs_path = normal_payload.abs_path + assert normal_abs_path is not None + assert os.path.isfile(normal_abs_path) + info_summary_append(f"nvrtc (normal env): abs_path={quote_for_shell(normal_abs_path)}") + + # Phase 2: load nvrtc without CUDA_HOME/CUDA_PATH. env = os.environ.copy() env.pop("CUDA_HOME", None) env.pop("CUDA_PATH", None) - - timeout = 120 if IS_WINDOWS else 30 command = build_dynamic_lib_subprocess_command(LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, "nvrtc") - result = subprocess.run( # noqa: S603 + stripped_result = subprocess.run( # noqa: S603 command, capture_output=True, text=True, @@ -173,22 +190,36 @@ def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append): env=env, cwd=LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD, ) - - if result.returncode != 0: - raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", result)) - assert not result.stderr - - payload = parse_dynamic_lib_subprocess_payload( - result.stdout, + if stripped_result.returncode != 0: + raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", stripped_result)) + assert not stripped_result.stderr + stripped_payload = parse_dynamic_lib_subprocess_payload( + stripped_result.stdout, libname="nvrtc", error_label="Load subprocess child process (no CUDA_HOME/CUDA_PATH)", ) - if payload.status == STATUS_NOT_FOUND: - info_summary_append("nvrtc not found without CUDA_HOME/CUDA_PATH") - pytest.skip("nvrtc not available without CUDA_HOME/CUDA_PATH") - - abs_path = payload.abs_path - assert abs_path is not None - info_summary_append(f"nvrtc (no CUDA_HOME/CUDA_PATH): abs_path={quote_for_shell(abs_path)}") - assert os.path.isfile(abs_path) + # Phase 3: evaluate. + if stripped_payload.status != STATUS_NOT_FOUND: + stripped_abs_path = stripped_payload.abs_path + assert stripped_abs_path is not None + assert os.path.isfile(stripped_abs_path) + info_summary_append(f"nvrtc (no CUDA_HOME/CUDA_PATH): abs_path={quote_for_shell(stripped_abs_path)}") + return + + # nvrtc was found normally but not without CUDA_HOME/CUDA_PATH. + # If the DLL's directory is on PATH, the system search should have found it. + dll_dir = os.path.normcase(os.path.normpath(os.path.dirname(normal_abs_path))) + on_path = any( + os.path.normcase(os.path.normpath(d)) == dll_dir for d in os.environ.get("PATH", "").split(os.pathsep) if d + ) + if on_path: + pytest.fail( + f"nvrtc was found at {normal_abs_path!r} (directory is on PATH) " + f"but could not be loaded without CUDA_HOME/CUDA_PATH. " + f"System search should find DLLs in PATH directories." + ) + info_summary_append( + f"nvrtc (no CUDA_HOME/CUDA_PATH): not found " + f"(normal-env directory not on PATH: {quote_for_shell(os.path.dirname(normal_abs_path))})" + ) From c3620f41ea3e059f48a016e1a1c34160b24e74cd Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 19 Mar 2026 15:06:53 -0700 Subject: [PATCH 4/5] fix: update comments in load_with_system_search to reflect correct diagnosis SearchPathW is the primary fix (bypasses Python 3.8+'s SetDefaultDllDirectories restriction); LOAD_WITH_ALTERED_SEARCH_PATH is a secondary benefit for dependency resolution. Made-with: Cursor --- .../cuda/pathfinder/_dynamic_libs/load_dl_windows.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py index 7ee443d450..b7c0a8c4c3 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py @@ -163,12 +163,11 @@ def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None: """ # Reverse tabulated names to achieve new -> old search order. for dll_name in reversed(desc.windows_dlls): - # First, find the DLL's full path using SearchPathW + # SearchPathW bypasses Python 3.8+'s SetDefaultDllDirectories restriction. found_path = _search_path_for_dll(dll_name) if found_path: - # Load with LOAD_WITH_ALTERED_SEARCH_PATH so Windows searches for - # dependencies from the DLL's directory (required for CUDA DLLs - # whose dependencies are co-located) + # LOAD_WITH_ALTERED_SEARCH_PATH additionally ensures dependencies + # are resolved from the DLL's directory. handle = kernel32.LoadLibraryExW(found_path, None, WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH) if handle: return LoadedDL(found_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") From a7ac1dcda066a7fdba0aafd9c9266ef69caa1f5e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 19 Mar 2026 16:15:52 -0700 Subject: [PATCH 5/5] fix: preserve Windows DLL search behavior in pathfinder Keep the native Windows process DLL search path for user-dir and driver-library behavior, then add an explicit PATH-only fallback for CTK libraries so PATH-based installs still load without broadening lookup semantics. Made-with: Cursor --- .../_dynamic_libs/load_dl_windows.py | 104 +++++++++++------- .../test_load_dl_windows_using_mocker.py | 90 +++++++++++++++ .../tests/test_load_nvidia_dynamic_lib.py | 7 +- 3 files changed, 158 insertions(+), 43 deletions(-) create mode 100644 cuda_pathfinder/tests/test_load_dl_windows_using_mocker.py diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py index b7c0a8c4c3..e1623ca457 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py @@ -7,6 +7,7 @@ import ctypes.wintypes import os import struct +from collections.abc import Iterator from typing import TYPE_CHECKING from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL @@ -15,7 +16,6 @@ from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor # Mirrors WinBase.h (unfortunately not defined already elsewhere) -WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH = 0x00000008 WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 @@ -48,17 +48,6 @@ kernel32.AddDllDirectory.argtypes = [ctypes.wintypes.LPCWSTR] kernel32.AddDllDirectory.restype = ctypes.c_void_p # DLL_DIRECTORY_COOKIE -# SearchPathW - find a file in the system search path -kernel32.SearchPathW.argtypes = [ - ctypes.wintypes.LPCWSTR, # lpPath (NULL to use standard search) - ctypes.wintypes.LPCWSTR, # lpFileName - ctypes.wintypes.LPCWSTR, # lpExtension - ctypes.wintypes.DWORD, # nBufferLength - ctypes.wintypes.LPWSTR, # lpBuffer - ctypes.POINTER(ctypes.wintypes.LPWSTR), # lpFilePart -] -kernel32.SearchPathW.restype = ctypes.wintypes.DWORD - def ctypes_handle_to_unsigned_int(handle: ctypes.wintypes.HMODULE) -> int: """Convert ctypes HMODULE to unsigned int.""" @@ -127,29 +116,55 @@ def check_if_already_loaded_from_elsewhere(desc: LibDescriptor, have_abs_path: b return None -def _search_path_for_dll(dll_name: str) -> str | None: - """Search for a DLL using Windows SearchPathW. - - Args: - dll_name: The name of the DLL to find +def _iter_env_path_directories(path_value: str | None) -> Iterator[str]: + """Yield normalized directories from PATH without consulting the current directory.""" + seen: set[str] = set() + if not path_value: + return + + for raw_entry in path_value.split(os.pathsep): + entry = os.path.expandvars(raw_entry.strip().strip('"')) + if not entry: + continue + if not os.path.isabs(entry): + # Relative PATH entries would implicitly consult the current + # directory, which we explicitly avoid for DLL lookup. + continue + if not os.path.isdir(entry): + continue + + normalized_entry = os.path.normcase(os.path.normpath(entry)) + if normalized_entry in seen: + continue + seen.add(normalized_entry) + yield entry + + +def _find_dll_on_env_path(dll_name: str) -> str | None: + """Locate a DLL by scanning PATH entries explicitly.""" + for dirpath in _iter_env_path_directories(os.environ.get("PATH")): + candidate = os.path.join(dirpath, dll_name) + if os.path.isfile(candidate): + return candidate + return None - Returns: - The absolute path to the DLL if found, None otherwise - """ - buffer = ctypes.create_unicode_buffer(260) # MAX_PATH - length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None) - if length == 0: +def _try_load_with_process_dll_search(desc: LibDescriptor, dll_name: str) -> LoadedDL | None: + """Try the process DLL search path configured by CPython/Windows.""" + handle = kernel32.LoadLibraryExW(dll_name, None, 0) + if not handle: return None - # If buffer was too small, try with larger buffer - if length > len(buffer): - buffer = ctypes.create_unicode_buffer(length) - length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None) - if length == 0: - return None + abs_path = abs_path_for_dynamic_library(desc.name, handle) + return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") - return buffer.value + +def _try_load_with_env_path_fallback(desc: LibDescriptor, dll_name: str) -> LoadedDL | None: + """Fallback for CTK-style installs exposed only via PATH.""" + found_path = _find_dll_on_env_path(dll_name) + if found_path is None: + return None + return load_with_abs_path(desc, found_path, "system-search") def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None: @@ -161,16 +176,25 @@ def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None: Returns: A LoadedDL object if successful, None if the library cannot be loaded """ - # Reverse tabulated names to achieve new -> old search order. - for dll_name in reversed(desc.windows_dlls): - # SearchPathW bypasses Python 3.8+'s SetDefaultDllDirectories restriction. - found_path = _search_path_for_dll(dll_name) - if found_path: - # LOAD_WITH_ALTERED_SEARCH_PATH additionally ensures dependencies - # are resolved from the DLL's directory. - handle = kernel32.LoadLibraryExW(found_path, None, WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH) - if handle: - return LoadedDL(found_path, False, ctypes_handle_to_unsigned_int(handle), "system-search") + dll_names = tuple(reversed(desc.windows_dlls)) + + # Phase 1: preserve the native process DLL search path (application dir, + # system32, AddDllDirectory user dirs, loaded-module list). + for dll_name in dll_names: + loaded = _try_load_with_process_dll_search(desc, dll_name) + if loaded is not None: + return loaded + + if desc.packaged_with == "driver": + return None + + # Phase 2: explicit PATH fallback for CTK-style installs only. Avoid + # SearchPathW because its search semantics differ from LoadLibraryExW and + # can consult the current directory. + for dll_name in dll_names: + loaded = _try_load_with_env_path_fallback(desc, dll_name) + if loaded is not None: + return loaded return None diff --git a/cuda_pathfinder/tests/test_load_dl_windows_using_mocker.py b/cuda_pathfinder/tests/test_load_dl_windows_using_mocker.py new file mode 100644 index 0000000000..c6a6ee1c6f --- /dev/null +++ b/cuda_pathfinder/tests/test_load_dl_windows_using_mocker.py @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os + +import pytest + +from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._utils.platform_aware import IS_WINDOWS + + +def _load_windows_module(): + if not IS_WINDOWS: + pytest.skip("Windows-specific loader tests") + from cuda.pathfinder._dynamic_libs import load_dl_windows as mod + + return mod + + +def _make_loaded_dl(path, found_via="system-search"): + return LoadedDL(path, False, 0xDEAD, found_via) + + +def test_find_dll_on_env_path_ignores_current_directory(tmp_path, monkeypatch): + mod = _load_windows_module() + + cwd_dir = tmp_path / "cwd" + cwd_dir.mkdir() + path_dir = tmp_path / "path_dir" + path_dir.mkdir() + + dll_name = "fakecuda.dll" + (cwd_dir / dll_name).write_bytes(b"cwd-copy") + expected = path_dir / dll_name + expected.write_bytes(b"path-copy") + + monkeypatch.chdir(cwd_dir) + monkeypatch.setenv("PATH", os.pathsep.join((".", f'"{path_dir}"'))) + + assert mod._find_dll_on_env_path(dll_name) == str(expected) + + +def test_env_path_fallback_uses_load_with_abs_path(tmp_path, monkeypatch, mocker): + mod = _load_windows_module() + desc = LIB_DESCRIPTORS["nvrtc"] + dll_name = desc.windows_dlls[-1] + + path_dir = tmp_path / "bin" + path_dir.mkdir() + dll_path = path_dir / dll_name + dll_path.write_bytes(b"fake-dll") + + monkeypatch.setenv("PATH", str(path_dir)) + expected = _make_loaded_dl(str(dll_path)) + load_with_abs_path = mocker.patch.object(mod, "load_with_abs_path", return_value=expected) + + result = mod._try_load_with_env_path_fallback(desc, dll_name) + + assert result is expected + load_with_abs_path.assert_called_once_with(desc, str(dll_path), "system-search") + + +def test_load_with_system_search_prefers_process_dll_search_over_env_path(mocker): + mod = _load_windows_module() + desc = LIB_DESCRIPTORS["nvrtc"] + expected = _make_loaded_dl(r"C:\CUDA\bin\nvrtc64_130_0.dll") + + process_search = mocker.patch.object(mod, "_try_load_with_process_dll_search", return_value=expected) + env_path = mocker.patch.object(mod, "_try_load_with_env_path_fallback") + + result = mod.load_with_system_search(desc) + + assert result is expected + process_search.assert_called_once_with(desc, desc.windows_dlls[-1]) + env_path.assert_not_called() + + +def test_load_with_system_search_skips_env_path_fallback_for_driver_libs(mocker): + mod = _load_windows_module() + desc = LIB_DESCRIPTORS["cuda"] + + process_search = mocker.patch.object(mod, "_try_load_with_process_dll_search", return_value=None) + env_path = mocker.patch.object(mod, "_try_load_with_env_path_fallback") + + result = mod.load_with_system_search(desc) + + assert result is None + assert process_search.call_count == len(desc.windows_dlls) + env_path.assert_not_called() diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 34ab208c43..2c568da752 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -147,9 +147,10 @@ def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append): """Regression test for issue #1781: nvrtc must load without CUDA_HOME/CUDA_PATH. On Windows, Python 3.8+ calls SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS) - at startup, which excludes PATH from the LoadLibraryExW search order. The fix uses - SearchPathW (unaffected by SetDefaultDllDirectories) to locate the DLL via PATH, then - loads it by absolute path with LOAD_WITH_ALTERED_SEARCH_PATH. + at startup, which excludes PATH from the LoadLibraryExW search order. The fix keeps + the native process DLL search first, then explicitly scans PATH for CTK DLLs and + loads any match by absolute path using LOAD_LIBRARY_SEARCH_DEFAULT_DIRS together with + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR. This test loads nvrtc twice in fresh subprocesses: once with the normal environment, once with CUDA_HOME and CUDA_PATH stripped. If the normal load finds nvrtc in a