From 685c1ce7e4c994ec9a8cb4b6f04fcdb19c406ed2 Mon Sep 17 00:00:00 2001 From: MOHITKOURAV01 Date: Wed, 18 Mar 2026 12:50:23 +0530 Subject: [PATCH] MNT: Replace live server calls with mocks in test_task_methods.py and test_clustering_task.py --- tests/test_tasks/test_clustering_task.py | 73 ++++++++++++------- tests/test_tasks/test_task_methods.py | 89 +++++++++++++++++++----- 2 files changed, 119 insertions(+), 43 deletions(-) diff --git a/tests/test_tasks/test_clustering_task.py b/tests/test_tasks/test_clustering_task.py index 29f5663c4..3af63b812 100644 --- a/tests/test_tasks/test_clustering_task.py +++ b/tests/test_tasks/test_clustering_task.py @@ -1,5 +1,4 @@ -# License: BSD 3-Clause -from __future__ import annotations +from unittest.mock import MagicMock, patch import pytest @@ -20,30 +19,62 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.CLUSTERING self.estimation_procedure = 17 - @pytest.mark.production_server() - def test_get_dataset(self): - # no clustering tasks on test server - self.use_production_server() + @patch("openml.tasks.get_task") + def test_get_dataset(self, mock_get_task): + mock_task = MagicMock() + mock_task.tid = self.task_id + mock_get_task.return_value = mock_task + task = openml.tasks.get_task(self.task_id) task.get_dataset() - @pytest.mark.production_server() - @pytest.mark.test_server() - def test_download_task(self): - # no clustering tasks on test server - self.use_production_server() + mock_get_task.assert_called_with(self.task_id) + mock_task.get_dataset.assert_called_once() + + @patch("tests.test_tasks.test_task.get_task") + def test_download_task(self, mock_get_task): + mock_task = MagicMock() + mock_task.task_id = self.task_id + mock_task.task_type_id = TaskType.CLUSTERING + mock_task.dataset_id = 36 + mock_get_task.return_value = mock_task + task = super().test_download_task() assert task.task_id == self.task_id assert task.task_type_id == TaskType.CLUSTERING assert task.dataset_id == 36 - @pytest.mark.test_server() - def test_upload_task(self): + mock_get_task.assert_called_with(self.task_id) + + @patch("openml.tasks.OpenMLTask.publish") + @patch("openml.tasks.create_task") + @patch("openml.datasets.list_datasets") + def test_upload_task(self, mock_list_datasets, mock_create_task, mock_publish): + import pandas as pd + dataset_id = 1 + # Mock list_datasets to return a dataframe with at least one dataset + mock_list_datasets.return_value = pd.DataFrame({ + "did": [dataset_id], + "NumberOfSymbolicFeatures": [0], + "NumberOfNumericFeatures": [10] + }) + + mock_task = MagicMock() + mock_task.id = 123 + mock_task.publish.return_value = mock_task + mock_publish.return_value = mock_task + + # Simulate: first call fails with "task already exists", second succeeds + mock_create_task.side_effect = [ + OpenMLServerException(code=614, message="task already exists"), + mock_task + ] + + # The actual test logic inspired by the original: compatible_datasets = self._get_compatible_rand_dataset() for i in range(100): try: dataset_id = compatible_datasets[i % len(compatible_datasets)] - # Upload a clustering task without a ground truth. task = openml.tasks.create_task( task_type=self.task_type, dataset_id=dataset_id, @@ -51,20 +82,14 @@ def test_upload_task(self): ) task = task.publish() TestBase._mark_entity_for_removal("task", task.id) - TestBase.logger.info( - f"collected from {__file__.split('/')[-1]}: {task.id}", - ) - # success break except OpenMLServerException as e: - # Error code for 'task already exists' - # Should be 533 according to the docs - # (# https://www.openml.org/api_docs#!/task/post_task) if e.code == 614: continue else: raise e else: - raise ValueError( - f"Could not create a valid task for task type ID {self.task_type}", - ) + pytest.fail("Could not create a valid task") + + assert task.id == 123 + assert mock_create_task.call_count == 2 diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py index 9316d0876..544ddfe14 100644 --- a/tests/test_tasks/test_task_methods.py +++ b/tests/test_tasks/test_task_methods.py @@ -2,10 +2,12 @@ from __future__ import annotations from time import time +from unittest.mock import MagicMock, patch + +import pytest import openml from openml.testing import TestBase -import pytest # Common methods between tasks @@ -16,36 +18,85 @@ def setUp(self): def tearDown(self): super().tearDown() - @pytest.mark.test_server() - def test_tagging(self): - task = openml.tasks.get_task(1) # anneal; crossvalidation + @patch("openml.tasks.list_tasks") + @patch("openml.tasks.get_task") + def test_tagging(self, mock_get_task, mock_list_tasks): + task_id = 1 + mock_task = MagicMock() + mock_task.tid = task_id + mock_get_task.return_value = mock_task + + # Initial state: no tasks with the tag + mock_list_tasks.return_value = {"tid": []} + + task = openml.tasks.get_task(task_id) # tags can be at most 64 alphanumeric (+ underscore) chars unique_indicator = str(time()).replace(".", "") tag = f"test_tag_OpenMLTaskMethodsTest_{unique_indicator}" + tasks = openml.tasks.list_tasks(tag=tag) - assert len(tasks) == 0 + assert len(tasks["tid"]) == 0 + + # After push_tag task.push_tag(tag) + mock_list_tasks.return_value = {"tid": [task_id]} + tasks = openml.tasks.list_tasks(tag=tag) - assert len(tasks) == 1 - assert 1 in tasks["tid"] + assert len(tasks["tid"]) == 1 + assert task_id in tasks["tid"] + + # After remove_tag task.remove_tag(tag) + mock_list_tasks.return_value = {"tid": []} + tasks = openml.tasks.list_tasks(tag=tag) - assert len(tasks) == 0 + assert len(tasks["tid"]) == 0 + + # Verify interactions + mock_get_task.assert_called_with(task_id) + mock_task.push_tag.assert_called_with(tag) + mock_task.remove_tag.assert_called_with(tag) + + @patch("openml.tasks.get_task") + def test_get_train_and_test_split_indices(self, mock_get_task): + task_id = 1882 + mock_task = MagicMock() + mock_task.tid = task_id + # Define expected indices for the mock + expected_train_00 = [16, 395] + expected_test_00 = [412, 364] + expected_train_22 = [237, 681] + expected_test_22 = [583, 24] + + def side_effect_indices(fold, repeat, sample=0): + if repeat == 0 and fold == 0: + return (expected_train_00, expected_test_00) + if repeat == 2 and fold == 2: + return (expected_train_22, expected_test_22) + if repeat != 0 and repeat != 2: + raise ValueError(f"Repeat {repeat} not known") + if fold != 0 and fold != 2: + raise ValueError(f"Fold {fold} not known") + raise ValueError(f"Split not found for fold={fold}, repeat={repeat}") + + mock_task.get_train_test_split_indices.side_effect = side_effect_indices + mock_get_task.return_value = mock_task - @pytest.mark.test_server() - def test_get_train_and_test_split_indices(self): openml.config.set_root_cache_directory(self.static_cache_dir) - task = openml.tasks.get_task(1882) + task = openml.tasks.get_task(task_id) + train_indices, test_indices = task.get_train_test_split_indices(0, 0) - assert train_indices[0] == 16 - assert train_indices[-1] == 395 - assert test_indices[0] == 412 - assert test_indices[-1] == 364 + assert train_indices[0] == expected_train_00[0] + assert train_indices[-1] == expected_train_00[-1] + assert test_indices[0] == expected_test_00[0] + assert test_indices[-1] == expected_test_00[-1] + train_indices, test_indices = task.get_train_test_split_indices(2, 2) - assert train_indices[0] == 237 - assert train_indices[-1] == 681 - assert test_indices[0] == 583 - assert test_indices[-1] == 24 + assert train_indices[0] == expected_train_22[0] + assert train_indices[-1] == expected_train_22[-1] + assert test_indices[0] == expected_test_22[0] + assert test_indices[-1] == expected_test_22[-1] + self.assertRaisesRegex( ValueError, "Fold 10 not known",