opsml.model.interfaces.sklearn

View Source

  1from pathlib import Path
  2from typing import Any, Dict, Optional, Union
  3
  4import joblib
  5import pandas as pd
  6from numpy.typing import NDArray
  7from pydantic import model_validator
  8
  9from opsml.helpers.utils import get_class_name
 10from opsml.model.interfaces.base import (
 11    ModelInterface,
 12    get_model_args,
 13    get_processor_name,
 14)
 15from opsml.types import CommonKwargs, Suffix, TrainedModelType
 16
 17try:
 18    from sklearn.base import BaseEstimator
 19
 20    class SklearnModel(ModelInterface):
 21        """Model interface for Sklearn models.
 22
 23        Args:
 24            model:
 25                Sklearn model
 26            preprocessor:
 27                Optional preprocessor
 28            sample_data:
 29                Sample data to be used for type inference.
 30                For sklearn models this should be a pandas DataFrame or numpy array.
 31                This should match exactly what the model expects as input.
 32            task_type:
 33                Task type for model. Defaults to undefined.
 34            model_type:
 35                Optional model type. This is inferred automatically.
 36            preprocessor_name:
 37                Optional preprocessor name. This is inferred automatically if a
 38                preprocessor is provided.
 39
 40        Returns:
 41        SklearnModel
 42        """
 43
 44        model: Optional[BaseEstimator] = None
 45        sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None
 46        preprocessor: Optional[Any] = None
 47        preprocessor_name: str = CommonKwargs.UNDEFINED.value
 48
 49        @property
 50        def model_class(self) -> str:
 51            return TrainedModelType.SKLEARN_ESTIMATOR.value
 52
 53        @model_validator(mode="before")
 54        @classmethod
 55        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 56            model = model_args.get("model")
 57
 58            if model_args.get("modelcard_uid", False):
 59                return model_args
 60
 61            model, module, bases = get_model_args(model)
 62
 63            if "sklearn" in module:
 64                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
 65
 66            else:
 67                for base in bases:
 68                    if "sklearn" in base:
 69                        model_args[CommonKwargs.MODEL_TYPE.value] = "subclass"
 70
 71            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
 72            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
 73            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
 74            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
 75                model_args.get(CommonKwargs.PREPROCESSOR.value),
 76            )
 77
 78            return model_args
 79
 80        def save_preprocessor(self, path: Path) -> None:
 81            """Saves preprocessor to path if present. Base implementation use Joblib
 82
 83            Args:
 84                path:
 85                    Pathlib object
 86            """
 87            assert self.preprocessor is not None, "No preprocessor detected in interface"
 88            joblib.dump(self.preprocessor, path)
 89
 90        def load_preprocessor(self, path: Path) -> None:
 91            """Load preprocessor from pathlib object
 92
 93            Args:
 94                path:
 95                    Pathlib object
 96            """
 97            self.preprocessor = joblib.load(path)
 98
 99        @property
100        def preprocessor_suffix(self) -> str:
101            """Returns suffix for storage"""
102            return Suffix.JOBLIB.value
103
104        @staticmethod
105        def name() -> str:
106            return SklearnModel.__name__
107
108except ModuleNotFoundError:
109    from opsml.model.interfaces.backups import SklearnModelNoModule as SklearnModel

class SklearnModel(opsml.model.interfaces.base.ModelInterface): View Source

 21    class SklearnModel(ModelInterface):
 22        """Model interface for Sklearn models.
 23
 24        Args:
 25            model:
 26                Sklearn model
 27            preprocessor:
 28                Optional preprocessor
 29            sample_data:
 30                Sample data to be used for type inference.
 31                For sklearn models this should be a pandas DataFrame or numpy array.
 32                This should match exactly what the model expects as input.
 33            task_type:
 34                Task type for model. Defaults to undefined.
 35            model_type:
 36                Optional model type. This is inferred automatically.
 37            preprocessor_name:
 38                Optional preprocessor name. This is inferred automatically if a
 39                preprocessor is provided.
 40
 41        Returns:
 42        SklearnModel
 43        """
 44
 45        model: Optional[BaseEstimator] = None
 46        sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None
 47        preprocessor: Optional[Any] = None
 48        preprocessor_name: str = CommonKwargs.UNDEFINED.value
 49
 50        @property
 51        def model_class(self) -> str:
 52            return TrainedModelType.SKLEARN_ESTIMATOR.value
 53
 54        @model_validator(mode="before")
 55        @classmethod
 56        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 57            model = model_args.get("model")
 58
 59            if model_args.get("modelcard_uid", False):
 60                return model_args
 61
 62            model, module, bases = get_model_args(model)
 63
 64            if "sklearn" in module:
 65                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
 66
 67            else:
 68                for base in bases:
 69                    if "sklearn" in base:
 70                        model_args[CommonKwargs.MODEL_TYPE.value] = "subclass"
 71
 72            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
 73            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
 74            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
 75            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
 76                model_args.get(CommonKwargs.PREPROCESSOR.value),
 77            )
 78
 79            return model_args
 80
 81        def save_preprocessor(self, path: Path) -> None:
 82            """Saves preprocessor to path if present. Base implementation use Joblib
 83
 84            Args:
 85                path:
 86                    Pathlib object
 87            """
 88            assert self.preprocessor is not None, "No preprocessor detected in interface"
 89            joblib.dump(self.preprocessor, path)
 90
 91        def load_preprocessor(self, path: Path) -> None:
 92            """Load preprocessor from pathlib object
 93
 94            Args:
 95                path:
 96                    Pathlib object
 97            """
 98            self.preprocessor = joblib.load(path)
 99
100        @property
101        def preprocessor_suffix(self) -> str:
102            """Returns suffix for storage"""
103            return Suffix.JOBLIB.value
104
105        @staticmethod
106        def name() -> str:
107            return SklearnModel.__name__

Model interface for Sklearn models.

Arguments:

model: Sklearn model
preprocessor: Optional preprocessor
sample_data: Sample data to be used for type inference. For sklearn models this should be a pandas DataFrame or numpy array. This should match exactly what the model expects as input.
task_type: Task type for model. Defaults to undefined.
model_type: Optional model type. This is inferred automatically.
preprocessor_name: Optional preprocessor name. This is inferred automatically if a preprocessor is provided.

Returns: SklearnModel

model: Optional[sklearn.base.BaseEstimator]

sample_data: Union[pandas.core.frame.DataFrame, numpy.ndarray[Any, numpy.dtype[Any]], NoneType]

preprocessor: Optional[Any]

preprocessor_name: str

model_class: str View Source

50        @property
51        def model_class(self) -> str:
52            return TrainedModelType.SKLEARN_ESTIMATOR.value

@model_validator(mode='before')

@classmethod

def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: View Source

54        @model_validator(mode="before")
55        @classmethod
56        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
57            model = model_args.get("model")
58
59            if model_args.get("modelcard_uid", False):
60                return model_args
61
62            model, module, bases = get_model_args(model)
63
64            if "sklearn" in module:
65                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
66
67            else:
68                for base in bases:
69                    if "sklearn" in base:
70                        model_args[CommonKwargs.MODEL_TYPE.value] = "subclass"
71
72            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
73            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
74            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
75            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
76                model_args.get(CommonKwargs.PREPROCESSOR.value),
77            )
78
79            return model_args

def save_preprocessor(self, path: pathlib.Path) -> None: View Source

81        def save_preprocessor(self, path: Path) -> None:
82            """Saves preprocessor to path if present. Base implementation use Joblib
83
84            Args:
85                path:
86                    Pathlib object
87            """
88            assert self.preprocessor is not None, "No preprocessor detected in interface"
89            joblib.dump(self.preprocessor, path)

Saves preprocessor to path if present. Base implementation use Joblib

Arguments:

path: Pathlib object

def load_preprocessor(self, path: pathlib.Path) -> None: View Source

91        def load_preprocessor(self, path: Path) -> None:
92            """Load preprocessor from pathlib object
93
94            Args:
95                path:
96                    Pathlib object
97            """
98            self.preprocessor = joblib.load(path)

Load preprocessor from pathlib object

Arguments:

path: Pathlib object

preprocessor_suffix: str View Source

100        @property
101        def preprocessor_suffix(self) -> str:
102            """Returns suffix for storage"""
103            return Suffix.JOBLIB.value

Returns suffix for storage

@staticmethod

def name() -> str: View Source

105        @staticmethod
106        def name() -> str:
107            return SklearnModel.__name__

model_config = {'protected_namespaces': ('protect_',), 'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True, 'extra': 'allow'}

model_fields = {'model': FieldInfo(annotation=Union[BaseEstimator, NoneType], required=False), 'sample_data': FieldInfo(annotation=Union[DataFrame, ndarray[Any, dtype[Any]], NoneType], required=False), 'onnx_model': FieldInfo(annotation=Union[OnnxModel, NoneType], required=False), 'task_type': FieldInfo(annotation=str, required=False, default='undefined'), 'model_type': FieldInfo(annotation=str, required=False, default='undefined'), 'data_type': FieldInfo(annotation=str, required=False, default='undefined'), 'modelcard_uid': FieldInfo(annotation=str, required=False, default=''), 'preprocessor': FieldInfo(annotation=Union[Any, NoneType], required=False), 'preprocessor_name': FieldInfo(annotation=str, required=False, default='undefined')}

model_computed_fields = {}

Inherited Members

pydantic.main.BaseModel: BaseModel; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; dict; json; parse_obj; parse_raw; parse_file; from_orm; construct; copy; schema; schema_json; validate; update_forward_refs
opsml.model.interfaces.base.ModelInterface: onnx_model; task_type; model_type; data_type; modelcard_uid; check_modelcard_uid; save_model; load_model; save_onnx; convert_to_onnx; load_onnx_model; save_sample_data; load_sample_data; get_sample_prediction; model_suffix; data_suffix