Source code for sktime_mcp.tools.evaluate

"""
evaluate tool for sktime MCP.

Executes cross-validation on an estimator.
"""

import logging
from typing import Any

from sktime.forecasting.model_evaluation import evaluate

try:
    from sktime.split import ExpandingWindowSplitter
except ImportError:  # pragma: no cover - sktime < 0.29
    from sktime.forecasting.model_selection import ExpandingWindowSplitter

from sktime_mcp.runtime.executor import get_executor

logger = logging.getLogger(__name__)


[docs] def evaluate_estimator_tool( estimator_handle: str, dataset: str, cv_folds: int = 3, ) -> dict[str, Any]: """ Evaluate an estimator using cross-validation. Args: estimator_handle: Handle from instantiate_estimator dataset: Name of demo dataset cv_folds: Number of folds for Splitter Returns: Dictionary with cross-validation results """ executor = get_executor() try: instance = executor._handle_manager.get_instance(estimator_handle) except KeyError: return {"success": False, "error": f"Handle not found: {estimator_handle}"} data_result = executor.load_dataset(dataset) if not data_result["success"]: return data_result y = data_result["data"] X = data_result.get("exog") try: n = len(y) folds = max(1, min(int(cv_folds), max(1, n - 1))) # Exactly `folds` backtest windows: train grows, last fold uses n-1 obs before last point. initial_window = max(1, n - folds) cv = ExpandingWindowSplitter(initial_window=initial_window, step_length=1, fh=[1]) results = evaluate(forecaster=instance, y=y, X=X, cv=cv) # Convert index or objects to strings suitable for JSON output if needed # We drop objects that are complex (like estimator instances themselves) from the output if "estimator" in results.columns: results = results.drop(columns=["estimator"]) metrics = results.to_dict(orient="records") return { "success": True, "results": metrics, "cv_folds_run": len(metrics), "cv_folds_requested": int(cv_folds), } except Exception as e: logger.exception("Error during evaluate") return {"success": False, "error": str(e)}