Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 46 additions & 5 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor

# Mirrors WinBase.h (unfortunately not defined already elsewhere)
WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH = 0x00000008
WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
WINBASE_LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000

Expand Down Expand Up @@ -47,6 +48,17 @@
kernel32.AddDllDirectory.argtypes = [ctypes.wintypes.LPCWSTR]
kernel32.AddDllDirectory.restype = ctypes.c_void_p # DLL_DIRECTORY_COOKIE

# SearchPathW - find a file in the system search path
kernel32.SearchPathW.argtypes = [
ctypes.wintypes.LPCWSTR, # lpPath (NULL to use standard search)
ctypes.wintypes.LPCWSTR, # lpFileName
ctypes.wintypes.LPCWSTR, # lpExtension
ctypes.wintypes.DWORD, # nBufferLength
ctypes.wintypes.LPWSTR, # lpBuffer
ctypes.POINTER(ctypes.wintypes.LPWSTR), # lpFilePart
]
kernel32.SearchPathW.restype = ctypes.wintypes.DWORD


def ctypes_handle_to_unsigned_int(handle: ctypes.wintypes.HMODULE) -> int:
"""Convert ctypes HMODULE to unsigned int."""
Expand Down Expand Up @@ -115,21 +127,50 @@ def check_if_already_loaded_from_elsewhere(desc: LibDescriptor, have_abs_path: b
return None


def _search_path_for_dll(dll_name: str) -> str | None:
"""Search for a DLL using Windows SearchPathW.

Args:
dll_name: The name of the DLL to find

Returns:
The absolute path to the DLL if found, None otherwise
"""
buffer = ctypes.create_unicode_buffer(260) # MAX_PATH
length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None)

if length == 0:
return None

# If buffer was too small, try with larger buffer
if length > len(buffer):
buffer = ctypes.create_unicode_buffer(length)
length = kernel32.SearchPathW(None, dll_name, None, len(buffer), buffer, None)
if length == 0:
return None

return buffer.value


def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None:
"""Try to load a DLL using system search paths.

Args:
libname: The name of the library to load
desc: Descriptor for the library to load

Returns:
A LoadedDL object if successful, None if the library cannot be loaded
"""
# Reverse tabulated names to achieve new -> old search order.
for dll_name in reversed(desc.windows_dlls):
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if handle:
abs_path = abs_path_for_dynamic_library(desc.name, handle)
return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle), "system-search")
# SearchPathW bypasses Python 3.8+'s SetDefaultDllDirectories restriction.
found_path = _search_path_for_dll(dll_name)
if found_path:
# LOAD_WITH_ALTERED_SEARCH_PATH additionally ensures dependencies
# are resolved from the DLL's directory.
handle = kernel32.LoadLibraryExW(found_path, None, WINBASE_LOAD_WITH_ALTERED_SEARCH_PATH)
if handle:
return LoadedDL(found_path, False, ctypes_handle_to_unsigned_int(handle), "system-search")

return None

Expand Down
91 changes: 90 additions & 1 deletion cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

import os
import platform
import subprocess

import pytest
from child_load_nvidia_dynamic_lib_helper import (
LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD,
LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE,
build_child_process_failed_for_libname_message,
run_load_nvidia_dynamic_lib_in_subprocess,
)
Expand All @@ -14,7 +17,11 @@
from cuda.pathfinder import DynamicLibNotAvailableError, DynamicLibUnknownError, load_nvidia_dynamic_lib
from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib_module
from cuda.pathfinder._dynamic_libs import supported_nvidia_libs
from cuda.pathfinder._dynamic_libs.subprocess_protocol import STATUS_NOT_FOUND, parse_dynamic_lib_subprocess_payload
from cuda.pathfinder._dynamic_libs.subprocess_protocol import (
STATUS_NOT_FOUND,
build_dynamic_lib_subprocess_command,
parse_dynamic_lib_subprocess_payload,
)
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell

STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works")
Expand Down Expand Up @@ -134,3 +141,85 @@ def raise_child_process_failed():
assert abs_path is not None
info_summary_append(f"abs_path={quote_for_shell(abs_path)}")
assert os.path.isfile(abs_path) # double-check the abs_path


def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append):
"""Regression test for issue #1781: nvrtc must load without CUDA_HOME/CUDA_PATH.

On Windows, Python 3.8+ calls SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS)
at startup, which excludes PATH from the LoadLibraryExW search order. The fix uses
SearchPathW (unaffected by SetDefaultDllDirectories) to locate the DLL via PATH, then
loads it by absolute path with LOAD_WITH_ALTERED_SEARCH_PATH.

This test loads nvrtc twice in fresh subprocesses: once with the normal environment,
once with CUDA_HOME and CUDA_PATH stripped. If the normal load finds nvrtc in a
directory on PATH, the stripped load must also succeed — otherwise the system search
is broken.
"""
timeout = 120 if IS_WINDOWS else 30

# Phase 1: load nvrtc with normal environment.
normal_result = run_load_nvidia_dynamic_lib_in_subprocess("nvrtc", timeout=timeout)
if normal_result.returncode != 0:
raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", normal_result))
assert not normal_result.stderr
normal_payload = parse_dynamic_lib_subprocess_payload(
normal_result.stdout,
libname="nvrtc",
error_label="Load subprocess child process (normal env)",
)
if normal_payload.status == STATUS_NOT_FOUND:
info_summary_append("nvrtc not found (normal env)")
pytest.skip("nvrtc not available in this environment")
normal_abs_path = normal_payload.abs_path
assert normal_abs_path is not None
assert os.path.isfile(normal_abs_path)
info_summary_append(f"nvrtc (normal env): abs_path={quote_for_shell(normal_abs_path)}")

# Phase 2: load nvrtc without CUDA_HOME/CUDA_PATH.
env = os.environ.copy()
env.pop("CUDA_HOME", None)
env.pop("CUDA_PATH", None)
command = build_dynamic_lib_subprocess_command(LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, "nvrtc")
stripped_result = subprocess.run( # noqa: S603
command,
capture_output=True,
text=True,
timeout=timeout,
check=False,
env=env,
cwd=LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD,
)
if stripped_result.returncode != 0:
raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", stripped_result))
assert not stripped_result.stderr
stripped_payload = parse_dynamic_lib_subprocess_payload(
stripped_result.stdout,
libname="nvrtc",
error_label="Load subprocess child process (no CUDA_HOME/CUDA_PATH)",
)

# Phase 3: evaluate.
if stripped_payload.status != STATUS_NOT_FOUND:
stripped_abs_path = stripped_payload.abs_path
assert stripped_abs_path is not None
assert os.path.isfile(stripped_abs_path)
info_summary_append(f"nvrtc (no CUDA_HOME/CUDA_PATH): abs_path={quote_for_shell(stripped_abs_path)}")
return

# nvrtc was found normally but not without CUDA_HOME/CUDA_PATH.
# If the DLL's directory is on PATH, the system search should have found it.
dll_dir = os.path.normcase(os.path.normpath(os.path.dirname(normal_abs_path)))
on_path = any(
os.path.normcase(os.path.normpath(d)) == dll_dir for d in os.environ.get("PATH", "").split(os.pathsep) if d
)
if on_path:
pytest.fail(
f"nvrtc was found at {normal_abs_path!r} (directory is on PATH) "
f"but could not be loaded without CUDA_HOME/CUDA_PATH. "
f"System search should find DLLs in PATH directories."
)
info_summary_append(
f"nvrtc (no CUDA_HOME/CUDA_PATH): not found "
f"(normal-env directory not on PATH: {quote_for_shell(os.path.dirname(normal_abs_path))})"
)
Loading