Source code for mmgp.backends.scikit_learn

# -*- coding: utf-8 -*-
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
#
#
from mmgp.regressor import RegressorBase

import numpy as np
from typing import Self

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel

[docs] class Regressor(RegressorBase): """ scikit-learn regressor """ def __init__(self, options: dict): """ Args: algo (str): The regression algorithm to use. options (dict): A dictionary of options specific to the chosen algorithm. Allowed fields are "kernel", "optim", "num_restarts", "max_iters" and "anisotropic". """ super(Regressor, self).__init__()
[docs] self.algo = "scikit-learn"
[docs] self.options = options
if 'show_warnings' in self.options and self.options['show_warnings'] == False: import warnings from sklearn.exceptions import ConvergenceWarning warnings.filterwarnings("ignore", category=ConvergenceWarning)
[docs] def fit(self, X: np.ndarray, y: np.ndarray) -> Self: """Train the regression model on the provided data. Args: X (np.ndarray): The input features for training. y (np.ndarray): The target values for training. """ self.input_dim = X.shape[1] self.output_dim = y.shape[1] available_kernel_classes = { "Matern":Matern} assert self.options["kernel"] in available_kernel_classes.keys(), "scikit-learn kernel "+self.options["kernel"]+" not available" kernel_class = available_kernel_classes[self.options["kernel"]] if self.options["anisotropic"]: kernel = ConstantKernel() * kernel_class(length_scale=np.ones(self.input_dim), length_scale_bounds=(1e-8, 1e8), **self.options["kernel_options"]) + WhiteKernel(noise_level_bounds=(1e-8, 1e8)) else: kernel = kernel_class(length_scale_bounds=(1e-8, 1e8), **self.options["kernel_options"]) \ + WhiteKernel(noise_level_bounds=(1e-8, 1e8)) if "seed" in self.options: random_state = self.options["seed"] else: random_state = None self.kmodel = [GaussianProcessRegressor( kernel=kernel, optimizer=self.options["optim"], n_restarts_optimizer=self.options["num_restarts"], random_state = random_state) for i in range(self.output_dim)] for i in range(self.output_dim): self.kmodel[i].fit(X, y[:,i].reshape(-1,1))
[docs] def predict(self, X: np.ndarray, return_var: bool) -> np.ndarray: """Make predictions using the trained regression model. Args: X (np.ndarray): The input features for making predictions. Array of size 1 x self.input_dim return_var (bool): True if prediction variance is computed Returns: np.ndarray: Predicted target values (and variances). One array (two arrays) of size 1 x self.output_dim """ pred=[self.kmodel[i].predict(X,return_std=return_var) for i in range(self.output_dim)] mean=np.zeros((1,self.output_dim)) if (return_var): var=np.zeros((1,self.output_dim)) for i in range(self.output_dim): mean[0,i]=pred[i][0][0] var[0,i]=pred[i][1][0]**2 return (mean,var) else: for i in range(self.output_dim): mean[0,i]=pred[i][0] return mean
[docs] def predict_Monte_Carlo_draw( self, X: np.ndarray) -> np.ndarray: """Generate Monte Carlo draws from the trained regression model. Args: X (np.ndarray): The input features for generating draws. Array of size 1 x self.input_dim Returns: np.ndarray: Monte Carlo draw from the posterior of the regression model. Array of size 1 x self.output_dim """ mean,var = self.predict(X,return_var=True) K=np.zeros((self.output_dim, self.output_dim)) np.fill_diagonal(K,np.sqrt(var)) samples=np.random.normal(loc=0, scale=1, size=self.output_dim).reshape(-1,1) samples=np.dot(K,samples).T return samples