opsml.model.interfaces.sklearn
1from pathlib import Path 2from typing import Any, Dict, Optional, Union 3 4import joblib 5import pandas as pd 6from numpy.typing import NDArray 7from pydantic import model_validator 8 9from opsml.helpers.utils import get_class_name 10from opsml.model.interfaces.base import ( 11 ModelInterface, 12 get_model_args, 13 get_processor_name, 14) 15from opsml.types import CommonKwargs, Suffix, TrainedModelType 16 17try: 18 from sklearn.base import BaseEstimator 19 20 class SklearnModel(ModelInterface): 21 """Model interface for Sklearn models. 22 23 Args: 24 model: 25 Sklearn model 26 preprocessor: 27 Optional preprocessor 28 sample_data: 29 Sample data to be used for type inference. 30 For sklearn models this should be a pandas DataFrame or numpy array. 31 This should match exactly what the model expects as input. 32 task_type: 33 Task type for model. Defaults to undefined. 34 model_type: 35 Optional model type. This is inferred automatically. 36 preprocessor_name: 37 Optional preprocessor name. This is inferred automatically if a 38 preprocessor is provided. 39 40 Returns: 41 SklearnModel 42 """ 43 44 model: Optional[BaseEstimator] = None 45 sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None 46 preprocessor: Optional[Any] = None 47 preprocessor_name: str = CommonKwargs.UNDEFINED.value 48 49 @property 50 def model_class(self) -> str: 51 return TrainedModelType.SKLEARN_ESTIMATOR.value 52 53 @model_validator(mode="before") 54 @classmethod 55 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 56 model = model_args.get("model") 57 58 if model_args.get("modelcard_uid", False): 59 return model_args 60 61 model, module, bases = get_model_args(model) 62 63 if "sklearn" in module: 64 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 65 66 else: 67 for base in bases: 68 if "sklearn" in base: 69 model_args[CommonKwargs.MODEL_TYPE.value] = "subclass" 70 71 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 72 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 73 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 74 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 75 model_args.get(CommonKwargs.PREPROCESSOR.value), 76 ) 77 78 return model_args 79 80 def save_preprocessor(self, path: Path) -> None: 81 """Saves preprocessor to path if present. Base implementation use Joblib 82 83 Args: 84 path: 85 Pathlib object 86 """ 87 assert self.preprocessor is not None, "No preprocessor detected in interface" 88 joblib.dump(self.preprocessor, path) 89 90 def load_preprocessor(self, path: Path) -> None: 91 """Load preprocessor from pathlib object 92 93 Args: 94 path: 95 Pathlib object 96 """ 97 self.preprocessor = joblib.load(path) 98 99 @property 100 def preprocessor_suffix(self) -> str: 101 """Returns suffix for storage""" 102 return Suffix.JOBLIB.value 103 104 @staticmethod 105 def name() -> str: 106 return SklearnModel.__name__ 107 108except ModuleNotFoundError: 109 from opsml.model.interfaces.backups import SklearnModelNoModule as SklearnModel
21 class SklearnModel(ModelInterface): 22 """Model interface for Sklearn models. 23 24 Args: 25 model: 26 Sklearn model 27 preprocessor: 28 Optional preprocessor 29 sample_data: 30 Sample data to be used for type inference. 31 For sklearn models this should be a pandas DataFrame or numpy array. 32 This should match exactly what the model expects as input. 33 task_type: 34 Task type for model. Defaults to undefined. 35 model_type: 36 Optional model type. This is inferred automatically. 37 preprocessor_name: 38 Optional preprocessor name. This is inferred automatically if a 39 preprocessor is provided. 40 41 Returns: 42 SklearnModel 43 """ 44 45 model: Optional[BaseEstimator] = None 46 sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None 47 preprocessor: Optional[Any] = None 48 preprocessor_name: str = CommonKwargs.UNDEFINED.value 49 50 @property 51 def model_class(self) -> str: 52 return TrainedModelType.SKLEARN_ESTIMATOR.value 53 54 @model_validator(mode="before") 55 @classmethod 56 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 57 model = model_args.get("model") 58 59 if model_args.get("modelcard_uid", False): 60 return model_args 61 62 model, module, bases = get_model_args(model) 63 64 if "sklearn" in module: 65 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 66 67 else: 68 for base in bases: 69 if "sklearn" in base: 70 model_args[CommonKwargs.MODEL_TYPE.value] = "subclass" 71 72 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 73 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 74 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 75 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 76 model_args.get(CommonKwargs.PREPROCESSOR.value), 77 ) 78 79 return model_args 80 81 def save_preprocessor(self, path: Path) -> None: 82 """Saves preprocessor to path if present. Base implementation use Joblib 83 84 Args: 85 path: 86 Pathlib object 87 """ 88 assert self.preprocessor is not None, "No preprocessor detected in interface" 89 joblib.dump(self.preprocessor, path) 90 91 def load_preprocessor(self, path: Path) -> None: 92 """Load preprocessor from pathlib object 93 94 Args: 95 path: 96 Pathlib object 97 """ 98 self.preprocessor = joblib.load(path) 99 100 @property 101 def preprocessor_suffix(self) -> str: 102 """Returns suffix for storage""" 103 return Suffix.JOBLIB.value 104 105 @staticmethod 106 def name() -> str: 107 return SklearnModel.__name__
Model interface for Sklearn models.
Arguments:
- model: Sklearn model
- preprocessor: Optional preprocessor
- sample_data: Sample data to be used for type inference. For sklearn models this should be a pandas DataFrame or numpy array. This should match exactly what the model expects as input.
- task_type: Task type for model. Defaults to undefined.
- model_type: Optional model type. This is inferred automatically.
- preprocessor_name: Optional preprocessor name. This is inferred automatically if a preprocessor is provided.
Returns: SklearnModel
@model_validator(mode='before')
@classmethod
def
check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
54 @model_validator(mode="before") 55 @classmethod 56 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 57 model = model_args.get("model") 58 59 if model_args.get("modelcard_uid", False): 60 return model_args 61 62 model, module, bases = get_model_args(model) 63 64 if "sklearn" in module: 65 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 66 67 else: 68 for base in bases: 69 if "sklearn" in base: 70 model_args[CommonKwargs.MODEL_TYPE.value] = "subclass" 71 72 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 73 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 74 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 75 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 76 model_args.get(CommonKwargs.PREPROCESSOR.value), 77 ) 78 79 return model_args
def
save_preprocessor(self, path: pathlib.Path) -> None:
81 def save_preprocessor(self, path: Path) -> None: 82 """Saves preprocessor to path if present. Base implementation use Joblib 83 84 Args: 85 path: 86 Pathlib object 87 """ 88 assert self.preprocessor is not None, "No preprocessor detected in interface" 89 joblib.dump(self.preprocessor, path)
Saves preprocessor to path if present. Base implementation use Joblib
Arguments:
- path: Pathlib object
def
load_preprocessor(self, path: pathlib.Path) -> None:
91 def load_preprocessor(self, path: Path) -> None: 92 """Load preprocessor from pathlib object 93 94 Args: 95 path: 96 Pathlib object 97 """ 98 self.preprocessor = joblib.load(path)
Load preprocessor from pathlib object
Arguments:
- path: Pathlib object
preprocessor_suffix: str
100 @property 101 def preprocessor_suffix(self) -> str: 102 """Returns suffix for storage""" 103 return Suffix.JOBLIB.value
Returns suffix for storage
model_config =
{'protected_namespaces': ('protect_',), 'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True, 'extra': 'allow'}
model_fields =
{'model': FieldInfo(annotation=Union[BaseEstimator, NoneType], required=False), 'sample_data': FieldInfo(annotation=Union[DataFrame, ndarray[Any, dtype[Any]], NoneType], required=False), 'onnx_model': FieldInfo(annotation=Union[OnnxModel, NoneType], required=False), 'task_type': FieldInfo(annotation=str, required=False, default='undefined'), 'model_type': FieldInfo(annotation=str, required=False, default='undefined'), 'data_type': FieldInfo(annotation=str, required=False, default='undefined'), 'modelcard_uid': FieldInfo(annotation=str, required=False, default=''), 'preprocessor': FieldInfo(annotation=Union[Any, NoneType], required=False), 'preprocessor_name': FieldInfo(annotation=str, required=False, default='undefined')}
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs