Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 72 additions & 7 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ctypes.wintypes
import os
import struct
from collections.abc import Iterator
from typing import TYPE_CHECKING

from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
Expand Down Expand Up @@ -115,21 +116,85 @@ def check_if_already_loaded_from_elsewhere(desc: LibDescriptor, have_abs_path: b
return None


def _iter_env_path_directories(path_value: str | None) -> Iterator[str]:
"""Yield normalized directories from PATH without consulting the current directory."""
seen: set[str] = set()
if not path_value:
return

for raw_entry in path_value.split(os.pathsep):
entry = os.path.expandvars(raw_entry.strip().strip('"'))
if not entry:
continue
if not os.path.isabs(entry):
# Relative PATH entries would implicitly consult the current
# directory, which we explicitly avoid for DLL lookup.
continue
if not os.path.isdir(entry):
continue

normalized_entry = os.path.normcase(os.path.normpath(entry))
if normalized_entry in seen:
continue
seen.add(normalized_entry)
yield entry


def _find_dll_on_env_path(dll_name: str) -> str | None:
"""Locate a DLL by scanning PATH entries explicitly."""
for dirpath in _iter_env_path_directories(os.environ.get("PATH")):
candidate = os.path.join(dirpath, dll_name)
if os.path.isfile(candidate):
return candidate
return None


def _try_load_with_process_dll_search(desc: LibDescriptor, dll_name: str) -> LoadedDL | None:
"""Try the process DLL search path configured by CPython/Windows."""
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if not handle:
return None

abs_path = abs_path_for_dynamic_library(desc.name, handle)
return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle), "system-search")


def _try_load_with_env_path_fallback(desc: LibDescriptor, dll_name: str) -> LoadedDL | None:
"""Fallback for CTK-style installs exposed only via PATH."""
found_path = _find_dll_on_env_path(dll_name)
if found_path is None:
return None
return load_with_abs_path(desc, found_path, "system-search")


def load_with_system_search(desc: LibDescriptor) -> LoadedDL | None:
"""Try to load a DLL using system search paths.

Args:
libname: The name of the library to load
desc: Descriptor for the library to load

Returns:
A LoadedDL object if successful, None if the library cannot be loaded
"""
# Reverse tabulated names to achieve new -> old search order.
for dll_name in reversed(desc.windows_dlls):
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if handle:
abs_path = abs_path_for_dynamic_library(desc.name, handle)
return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle), "system-search")
dll_names = tuple(reversed(desc.windows_dlls))

# Phase 1: preserve the native process DLL search path (application dir,
# system32, AddDllDirectory user dirs, loaded-module list).
for dll_name in dll_names:
loaded = _try_load_with_process_dll_search(desc, dll_name)
if loaded is not None:
return loaded

if desc.packaged_with == "driver":
return None

# Phase 2: explicit PATH fallback for CTK-style installs only. Avoid
# SearchPathW because its search semantics differ from LoadLibraryExW and
# can consult the current directory.
for dll_name in dll_names:
loaded = _try_load_with_env_path_fallback(desc, dll_name)
if loaded is not None:
return loaded

return None

Expand Down
90 changes: 90 additions & 0 deletions cuda_pathfinder/tests/test_load_dl_windows_using_mocker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os

import pytest

from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS


def _load_windows_module():
if not IS_WINDOWS:
pytest.skip("Windows-specific loader tests")
from cuda.pathfinder._dynamic_libs import load_dl_windows as mod

return mod


def _make_loaded_dl(path, found_via="system-search"):
return LoadedDL(path, False, 0xDEAD, found_via)


def test_find_dll_on_env_path_ignores_current_directory(tmp_path, monkeypatch):
mod = _load_windows_module()

cwd_dir = tmp_path / "cwd"
cwd_dir.mkdir()
path_dir = tmp_path / "path_dir"
path_dir.mkdir()

dll_name = "fakecuda.dll"
(cwd_dir / dll_name).write_bytes(b"cwd-copy")
expected = path_dir / dll_name
expected.write_bytes(b"path-copy")

monkeypatch.chdir(cwd_dir)
monkeypatch.setenv("PATH", os.pathsep.join((".", f'"{path_dir}"')))

assert mod._find_dll_on_env_path(dll_name) == str(expected)


def test_env_path_fallback_uses_load_with_abs_path(tmp_path, monkeypatch, mocker):
mod = _load_windows_module()
desc = LIB_DESCRIPTORS["nvrtc"]
dll_name = desc.windows_dlls[-1]

path_dir = tmp_path / "bin"
path_dir.mkdir()
dll_path = path_dir / dll_name
dll_path.write_bytes(b"fake-dll")

monkeypatch.setenv("PATH", str(path_dir))
expected = _make_loaded_dl(str(dll_path))
load_with_abs_path = mocker.patch.object(mod, "load_with_abs_path", return_value=expected)

result = mod._try_load_with_env_path_fallback(desc, dll_name)

assert result is expected
load_with_abs_path.assert_called_once_with(desc, str(dll_path), "system-search")


def test_load_with_system_search_prefers_process_dll_search_over_env_path(mocker):
mod = _load_windows_module()
desc = LIB_DESCRIPTORS["nvrtc"]
expected = _make_loaded_dl(r"C:\CUDA\bin\nvrtc64_130_0.dll")

process_search = mocker.patch.object(mod, "_try_load_with_process_dll_search", return_value=expected)
env_path = mocker.patch.object(mod, "_try_load_with_env_path_fallback")

result = mod.load_with_system_search(desc)

assert result is expected
process_search.assert_called_once_with(desc, desc.windows_dlls[-1])
env_path.assert_not_called()


def test_load_with_system_search_skips_env_path_fallback_for_driver_libs(mocker):
mod = _load_windows_module()
desc = LIB_DESCRIPTORS["cuda"]

process_search = mocker.patch.object(mod, "_try_load_with_process_dll_search", return_value=None)
env_path = mocker.patch.object(mod, "_try_load_with_env_path_fallback")

result = mod.load_with_system_search(desc)

assert result is None
assert process_search.call_count == len(desc.windows_dlls)
env_path.assert_not_called()
92 changes: 91 additions & 1 deletion cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@

import os
import platform
import subprocess

import pytest
from child_load_nvidia_dynamic_lib_helper import (
LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD,
LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE,
build_child_process_failed_for_libname_message,
run_load_nvidia_dynamic_lib_in_subprocess,
)
Expand All @@ -14,7 +17,11 @@
from cuda.pathfinder import DynamicLibNotAvailableError, DynamicLibUnknownError, load_nvidia_dynamic_lib
from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib_module
from cuda.pathfinder._dynamic_libs import supported_nvidia_libs
from cuda.pathfinder._dynamic_libs.subprocess_protocol import STATUS_NOT_FOUND, parse_dynamic_lib_subprocess_payload
from cuda.pathfinder._dynamic_libs.subprocess_protocol import (
STATUS_NOT_FOUND,
build_dynamic_lib_subprocess_command,
parse_dynamic_lib_subprocess_payload,
)
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell

STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works")
Expand Down Expand Up @@ -134,3 +141,86 @@ def raise_child_process_failed():
assert abs_path is not None
info_summary_append(f"abs_path={quote_for_shell(abs_path)}")
assert os.path.isfile(abs_path) # double-check the abs_path


def test_load_nvrtc_without_cuda_home_or_cuda_path(info_summary_append):
"""Regression test for issue #1781: nvrtc must load without CUDA_HOME/CUDA_PATH.

On Windows, Python 3.8+ calls SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS)
at startup, which excludes PATH from the LoadLibraryExW search order. The fix keeps
the native process DLL search first, then explicitly scans PATH for CTK DLLs and
loads any match by absolute path using LOAD_LIBRARY_SEARCH_DEFAULT_DIRS together with
LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR.

This test loads nvrtc twice in fresh subprocesses: once with the normal environment,
once with CUDA_HOME and CUDA_PATH stripped. If the normal load finds nvrtc in a
directory on PATH, the stripped load must also succeed — otherwise the system search
is broken.
"""
timeout = 120 if IS_WINDOWS else 30

# Phase 1: load nvrtc with normal environment.
normal_result = run_load_nvidia_dynamic_lib_in_subprocess("nvrtc", timeout=timeout)
if normal_result.returncode != 0:
raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", normal_result))
assert not normal_result.stderr
normal_payload = parse_dynamic_lib_subprocess_payload(
normal_result.stdout,
libname="nvrtc",
error_label="Load subprocess child process (normal env)",
)
if normal_payload.status == STATUS_NOT_FOUND:
info_summary_append("nvrtc not found (normal env)")
pytest.skip("nvrtc not available in this environment")
normal_abs_path = normal_payload.abs_path
assert normal_abs_path is not None
assert os.path.isfile(normal_abs_path)
info_summary_append(f"nvrtc (normal env): abs_path={quote_for_shell(normal_abs_path)}")

# Phase 2: load nvrtc without CUDA_HOME/CUDA_PATH.
env = os.environ.copy()
env.pop("CUDA_HOME", None)
env.pop("CUDA_PATH", None)
command = build_dynamic_lib_subprocess_command(LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODE, "nvrtc")
stripped_result = subprocess.run( # noqa: S603
command,
capture_output=True,
text=True,
timeout=timeout,
check=False,
env=env,
cwd=LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD,
)
if stripped_result.returncode != 0:
raise RuntimeError(build_child_process_failed_for_libname_message("nvrtc", stripped_result))
assert not stripped_result.stderr
stripped_payload = parse_dynamic_lib_subprocess_payload(
stripped_result.stdout,
libname="nvrtc",
error_label="Load subprocess child process (no CUDA_HOME/CUDA_PATH)",
)

# Phase 3: evaluate.
if stripped_payload.status != STATUS_NOT_FOUND:
stripped_abs_path = stripped_payload.abs_path
assert stripped_abs_path is not None
assert os.path.isfile(stripped_abs_path)
info_summary_append(f"nvrtc (no CUDA_HOME/CUDA_PATH): abs_path={quote_for_shell(stripped_abs_path)}")
return

# nvrtc was found normally but not without CUDA_HOME/CUDA_PATH.
# If the DLL's directory is on PATH, the system search should have found it.
dll_dir = os.path.normcase(os.path.normpath(os.path.dirname(normal_abs_path)))
on_path = any(
os.path.normcase(os.path.normpath(d)) == dll_dir for d in os.environ.get("PATH", "").split(os.pathsep) if d
)
if on_path:
pytest.fail(
f"nvrtc was found at {normal_abs_path!r} (directory is on PATH) "
f"but could not be loaded without CUDA_HOME/CUDA_PATH. "
f"System search should find DLLs in PATH directories."
)
info_summary_append(
f"nvrtc (no CUDA_HOME/CUDA_PATH): not found "
f"(normal-env directory not on PATH: {quote_for_shell(os.path.dirname(normal_abs_path))})"
)