opsml.model.interfaces.catboost_
1import tempfile 2from pathlib import Path 3from typing import Any, Dict, List, Optional, Union, cast 4 5import joblib 6import numpy as np 7from numpy.typing import NDArray 8from pydantic import model_validator 9 10from opsml.helpers.logging import ArtifactLogger 11from opsml.helpers.utils import get_class_name 12from opsml.model.interfaces.base import ( 13 ModelInterface, 14 SamplePrediction, 15 get_model_args, 16 get_processor_name, 17) 18from opsml.types import ( 19 CommonKwargs, 20 ModelReturn, 21 OnnxModel, 22 SaveName, 23 Suffix, 24 TrainedModelType, 25) 26 27logger = ArtifactLogger.get_logger() 28 29try: 30 from catboost import CatBoost 31 32 class CatBoostModel(ModelInterface): 33 """Model interface for CatBoost models. 34 35 Args: 36 model: 37 CatBoost model (Classifier, Regressor, Ranker) 38 preprocessor: 39 Optional preprocessor 40 sample_data: 41 Sample data to be used for type inference and sample prediction. 42 For catboost models this should be a numpy array (either 1d or 2d) or list of feature values. 43 This should match exactly what the model expects as input. 44 task_type: 45 Task type for model. Defaults to undefined. 46 model_type: 47 Optional model type. This is inferred automatically. 48 preprocessor_name: 49 Optional preprocessor name. This is inferred automatically if a 50 preprocessor is provided. 51 52 Returns: 53 CatBoostModel 54 """ 55 56 model: Optional[CatBoost] = None 57 sample_data: Optional[Union[List[Any], NDArray[Any]]] = None 58 preprocessor: Optional[Any] = None 59 preprocessor_name: str = CommonKwargs.UNDEFINED.value 60 61 @classmethod 62 def _get_sample_data(cls, sample_data: NDArray[Any]) -> Union[List[Any], NDArray[Any]]: 63 """Check sample data and returns one record to be used 64 during type inference and sample prediction. 65 66 Returns: 67 Sample data with only one record 68 """ 69 70 if isinstance(sample_data, list): 71 return sample_data 72 73 if isinstance(sample_data, np.ndarray): 74 if len(sample_data.shape) == 1: 75 return sample_data.reshape(1, -1) 76 return sample_data[0:1] 77 78 raise ValueError("Sample data should be a list or numpy array") 79 80 def get_sample_prediction(self) -> SamplePrediction: 81 assert self.model is not None, "Model is not defined" 82 assert self.sample_data is not None, "Sample data must be provided" 83 84 prediction = self.model.predict(self.sample_data) 85 86 prediction_type = get_class_name(prediction) 87 88 return SamplePrediction(prediction_type, prediction) 89 90 @property 91 def model_class(self) -> str: 92 return TrainedModelType.CATBOOST.value 93 94 @model_validator(mode="before") 95 @classmethod 96 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 97 model = model_args.get("model") 98 99 if model_args.get("modelcard_uid", False): 100 return model_args 101 102 model, module, bases = get_model_args(model) 103 104 if "catboost" in module: 105 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 106 107 else: 108 for base in bases: 109 if "catboost" in base: 110 model_args[CommonKwargs.MODEL_TYPE.value] = "subclass" 111 112 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 113 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 114 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 115 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 116 model_args.get(CommonKwargs.PREPROCESSOR.value), 117 ) 118 119 return model_args 120 121 def save_model(self, path: Path) -> None: 122 """Saves model to path. Base implementation use Joblib 123 124 Args: 125 path: 126 Pathlib object 127 """ 128 assert self.model is not None, "No model detected in interface" 129 self.model.save_model(path.as_posix()) 130 131 def load_model(self, path: Path, **kwargs: Any) -> None: 132 """Load model from pathlib object 133 134 Args: 135 path: 136 Pathlib object 137 kwargs: 138 Additional kwargs 139 """ 140 import catboost 141 142 model = getattr(catboost, self.model_type, CatBoost)() 143 self.model = model.load_model(path.as_posix()) 144 145 def _convert_to_onnx_inplace(self) -> None: 146 """Convert to onnx model using temp dir""" 147 with tempfile.TemporaryDirectory() as tmpdir: 148 lpath = Path(tmpdir) / SaveName.ONNX_MODEL.value 149 onnx_path = lpath.with_suffix(Suffix.ONNX.value) 150 self.convert_to_onnx(**{"path": onnx_path}) 151 152 def convert_to_onnx(self, **kwargs: Path) -> None: 153 """Converts model to onnx format""" 154 155 logger.info("Converting CatBoost model to onnx format") 156 157 import onnx 158 import onnxruntime as rt 159 160 if self.onnx_model is not None: 161 return None 162 163 path: Optional[Path] = kwargs.get("path") 164 if path is None: 165 return self._convert_to_onnx_inplace() 166 167 assert self.model is not None, "No model detected in interface" 168 self.model.save_model( 169 path.as_posix(), 170 format="onnx", 171 export_parameters={"onnx_domain": "ai.catboost"}, 172 ) 173 self.onnx_model = OnnxModel( 174 onnx_version=onnx.__version__, 175 sess=rt.InferenceSession(path.as_posix()), 176 ) 177 return None 178 179 def save_onnx(self, path: Path) -> ModelReturn: 180 import onnxruntime as rt 181 182 from opsml.model.onnx import _get_onnx_metadata 183 184 if self.onnx_model is None: 185 self.convert_to_onnx(**{"path": path}) 186 187 else: 188 self.onnx_model.sess_to_path(path) 189 190 assert self.onnx_model is not None, "No onnx model detected in interface" 191 192 # no need to save onnx to bytes since its done during onnx conversion 193 return _get_onnx_metadata(self, cast(rt.InferenceSession, self.onnx_model.sess)) 194 195 def save_preprocessor(self, path: Path) -> None: 196 """Saves preprocessor to path if present. Base implementation use Joblib 197 198 Args: 199 path: 200 Pathlib object 201 """ 202 assert self.preprocessor is not None, "No preprocessor detected in interface" 203 joblib.dump(self.preprocessor, path) 204 205 def load_preprocessor(self, path: Path) -> None: 206 """Load preprocessor from pathlib object 207 208 Args: 209 path: 210 Pathlib object 211 """ 212 self.preprocessor = joblib.load(path) 213 214 @property 215 def preprocessor_suffix(self) -> str: 216 """Returns suffix for storage""" 217 return Suffix.JOBLIB.value 218 219 @property 220 def model_suffix(self) -> str: 221 """Returns suffix for storage""" 222 return Suffix.CATBOOST.value 223 224 @staticmethod 225 def name() -> str: 226 return CatBoostModel.__name__ 227 228except ModuleNotFoundError: 229 from opsml.model.interfaces.backups import CatBoostModelNoModule as CatBoostModel
logger =
<builtins.Logger object>
33 class CatBoostModel(ModelInterface): 34 """Model interface for CatBoost models. 35 36 Args: 37 model: 38 CatBoost model (Classifier, Regressor, Ranker) 39 preprocessor: 40 Optional preprocessor 41 sample_data: 42 Sample data to be used for type inference and sample prediction. 43 For catboost models this should be a numpy array (either 1d or 2d) or list of feature values. 44 This should match exactly what the model expects as input. 45 task_type: 46 Task type for model. Defaults to undefined. 47 model_type: 48 Optional model type. This is inferred automatically. 49 preprocessor_name: 50 Optional preprocessor name. This is inferred automatically if a 51 preprocessor is provided. 52 53 Returns: 54 CatBoostModel 55 """ 56 57 model: Optional[CatBoost] = None 58 sample_data: Optional[Union[List[Any], NDArray[Any]]] = None 59 preprocessor: Optional[Any] = None 60 preprocessor_name: str = CommonKwargs.UNDEFINED.value 61 62 @classmethod 63 def _get_sample_data(cls, sample_data: NDArray[Any]) -> Union[List[Any], NDArray[Any]]: 64 """Check sample data and returns one record to be used 65 during type inference and sample prediction. 66 67 Returns: 68 Sample data with only one record 69 """ 70 71 if isinstance(sample_data, list): 72 return sample_data 73 74 if isinstance(sample_data, np.ndarray): 75 if len(sample_data.shape) == 1: 76 return sample_data.reshape(1, -1) 77 return sample_data[0:1] 78 79 raise ValueError("Sample data should be a list or numpy array") 80 81 def get_sample_prediction(self) -> SamplePrediction: 82 assert self.model is not None, "Model is not defined" 83 assert self.sample_data is not None, "Sample data must be provided" 84 85 prediction = self.model.predict(self.sample_data) 86 87 prediction_type = get_class_name(prediction) 88 89 return SamplePrediction(prediction_type, prediction) 90 91 @property 92 def model_class(self) -> str: 93 return TrainedModelType.CATBOOST.value 94 95 @model_validator(mode="before") 96 @classmethod 97 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 98 model = model_args.get("model") 99 100 if model_args.get("modelcard_uid", False): 101 return model_args 102 103 model, module, bases = get_model_args(model) 104 105 if "catboost" in module: 106 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 107 108 else: 109 for base in bases: 110 if "catboost" in base: 111 model_args[CommonKwargs.MODEL_TYPE.value] = "subclass" 112 113 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 114 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 115 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 116 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 117 model_args.get(CommonKwargs.PREPROCESSOR.value), 118 ) 119 120 return model_args 121 122 def save_model(self, path: Path) -> None: 123 """Saves model to path. Base implementation use Joblib 124 125 Args: 126 path: 127 Pathlib object 128 """ 129 assert self.model is not None, "No model detected in interface" 130 self.model.save_model(path.as_posix()) 131 132 def load_model(self, path: Path, **kwargs: Any) -> None: 133 """Load model from pathlib object 134 135 Args: 136 path: 137 Pathlib object 138 kwargs: 139 Additional kwargs 140 """ 141 import catboost 142 143 model = getattr(catboost, self.model_type, CatBoost)() 144 self.model = model.load_model(path.as_posix()) 145 146 def _convert_to_onnx_inplace(self) -> None: 147 """Convert to onnx model using temp dir""" 148 with tempfile.TemporaryDirectory() as tmpdir: 149 lpath = Path(tmpdir) / SaveName.ONNX_MODEL.value 150 onnx_path = lpath.with_suffix(Suffix.ONNX.value) 151 self.convert_to_onnx(**{"path": onnx_path}) 152 153 def convert_to_onnx(self, **kwargs: Path) -> None: 154 """Converts model to onnx format""" 155 156 logger.info("Converting CatBoost model to onnx format") 157 158 import onnx 159 import onnxruntime as rt 160 161 if self.onnx_model is not None: 162 return None 163 164 path: Optional[Path] = kwargs.get("path") 165 if path is None: 166 return self._convert_to_onnx_inplace() 167 168 assert self.model is not None, "No model detected in interface" 169 self.model.save_model( 170 path.as_posix(), 171 format="onnx", 172 export_parameters={"onnx_domain": "ai.catboost"}, 173 ) 174 self.onnx_model = OnnxModel( 175 onnx_version=onnx.__version__, 176 sess=rt.InferenceSession(path.as_posix()), 177 ) 178 return None 179 180 def save_onnx(self, path: Path) -> ModelReturn: 181 import onnxruntime as rt 182 183 from opsml.model.onnx import _get_onnx_metadata 184 185 if self.onnx_model is None: 186 self.convert_to_onnx(**{"path": path}) 187 188 else: 189 self.onnx_model.sess_to_path(path) 190 191 assert self.onnx_model is not None, "No onnx model detected in interface" 192 193 # no need to save onnx to bytes since its done during onnx conversion 194 return _get_onnx_metadata(self, cast(rt.InferenceSession, self.onnx_model.sess)) 195 196 def save_preprocessor(self, path: Path) -> None: 197 """Saves preprocessor to path if present. Base implementation use Joblib 198 199 Args: 200 path: 201 Pathlib object 202 """ 203 assert self.preprocessor is not None, "No preprocessor detected in interface" 204 joblib.dump(self.preprocessor, path) 205 206 def load_preprocessor(self, path: Path) -> None: 207 """Load preprocessor from pathlib object 208 209 Args: 210 path: 211 Pathlib object 212 """ 213 self.preprocessor = joblib.load(path) 214 215 @property 216 def preprocessor_suffix(self) -> str: 217 """Returns suffix for storage""" 218 return Suffix.JOBLIB.value 219 220 @property 221 def model_suffix(self) -> str: 222 """Returns suffix for storage""" 223 return Suffix.CATBOOST.value 224 225 @staticmethod 226 def name() -> str: 227 return CatBoostModel.__name__
Model interface for CatBoost models.
Arguments:
- model: CatBoost model (Classifier, Regressor, Ranker)
- preprocessor: Optional preprocessor
- sample_data: Sample data to be used for type inference and sample prediction. For catboost models this should be a numpy array (either 1d or 2d) or list of feature values. This should match exactly what the model expects as input.
- task_type: Task type for model. Defaults to undefined.
- model_type: Optional model type. This is inferred automatically.
- preprocessor_name: Optional preprocessor name. This is inferred automatically if a preprocessor is provided.
Returns:
CatBoostModel
81 def get_sample_prediction(self) -> SamplePrediction: 82 assert self.model is not None, "Model is not defined" 83 assert self.sample_data is not None, "Sample data must be provided" 84 85 prediction = self.model.predict(self.sample_data) 86 87 prediction_type = get_class_name(prediction) 88 89 return SamplePrediction(prediction_type, prediction)
@model_validator(mode='before')
@classmethod
def
check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
95 @model_validator(mode="before") 96 @classmethod 97 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 98 model = model_args.get("model") 99 100 if model_args.get("modelcard_uid", False): 101 return model_args 102 103 model, module, bases = get_model_args(model) 104 105 if "catboost" in module: 106 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 107 108 else: 109 for base in bases: 110 if "catboost" in base: 111 model_args[CommonKwargs.MODEL_TYPE.value] = "subclass" 112 113 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 114 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 115 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 116 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 117 model_args.get(CommonKwargs.PREPROCESSOR.value), 118 ) 119 120 return model_args
def
save_model(self, path: pathlib.Path) -> None:
122 def save_model(self, path: Path) -> None: 123 """Saves model to path. Base implementation use Joblib 124 125 Args: 126 path: 127 Pathlib object 128 """ 129 assert self.model is not None, "No model detected in interface" 130 self.model.save_model(path.as_posix())
Saves model to path. Base implementation use Joblib
Arguments:
- path: Pathlib object
def
load_model(self, path: pathlib.Path, **kwargs: Any) -> None:
132 def load_model(self, path: Path, **kwargs: Any) -> None: 133 """Load model from pathlib object 134 135 Args: 136 path: 137 Pathlib object 138 kwargs: 139 Additional kwargs 140 """ 141 import catboost 142 143 model = getattr(catboost, self.model_type, CatBoost)() 144 self.model = model.load_model(path.as_posix())
Load model from pathlib object
Arguments:
- path: Pathlib object
- kwargs: Additional kwargs
def
convert_to_onnx(self, **kwargs: pathlib.Path) -> None:
153 def convert_to_onnx(self, **kwargs: Path) -> None: 154 """Converts model to onnx format""" 155 156 logger.info("Converting CatBoost model to onnx format") 157 158 import onnx 159 import onnxruntime as rt 160 161 if self.onnx_model is not None: 162 return None 163 164 path: Optional[Path] = kwargs.get("path") 165 if path is None: 166 return self._convert_to_onnx_inplace() 167 168 assert self.model is not None, "No model detected in interface" 169 self.model.save_model( 170 path.as_posix(), 171 format="onnx", 172 export_parameters={"onnx_domain": "ai.catboost"}, 173 ) 174 self.onnx_model = OnnxModel( 175 onnx_version=onnx.__version__, 176 sess=rt.InferenceSession(path.as_posix()), 177 ) 178 return None
Converts model to onnx format
def
save_onnx(self, path: pathlib.Path) -> opsml.types.model.ModelReturn:
180 def save_onnx(self, path: Path) -> ModelReturn: 181 import onnxruntime as rt 182 183 from opsml.model.onnx import _get_onnx_metadata 184 185 if self.onnx_model is None: 186 self.convert_to_onnx(**{"path": path}) 187 188 else: 189 self.onnx_model.sess_to_path(path) 190 191 assert self.onnx_model is not None, "No onnx model detected in interface" 192 193 # no need to save onnx to bytes since its done during onnx conversion 194 return _get_onnx_metadata(self, cast(rt.InferenceSession, self.onnx_model.sess))
Saves the onnx model
Arguments:
- path: Path to save
Returns:
ModelReturn
def
save_preprocessor(self, path: pathlib.Path) -> None:
196 def save_preprocessor(self, path: Path) -> None: 197 """Saves preprocessor to path if present. Base implementation use Joblib 198 199 Args: 200 path: 201 Pathlib object 202 """ 203 assert self.preprocessor is not None, "No preprocessor detected in interface" 204 joblib.dump(self.preprocessor, path)
Saves preprocessor to path if present. Base implementation use Joblib
Arguments:
- path: Pathlib object
def
load_preprocessor(self, path: pathlib.Path) -> None:
206 def load_preprocessor(self, path: Path) -> None: 207 """Load preprocessor from pathlib object 208 209 Args: 210 path: 211 Pathlib object 212 """ 213 self.preprocessor = joblib.load(path)
Load preprocessor from pathlib object
Arguments:
- path: Pathlib object
preprocessor_suffix: str
215 @property 216 def preprocessor_suffix(self) -> str: 217 """Returns suffix for storage""" 218 return Suffix.JOBLIB.value
Returns suffix for storage
model_suffix: str
220 @property 221 def model_suffix(self) -> str: 222 """Returns suffix for storage""" 223 return Suffix.CATBOOST.value
Returns suffix for storage
model_config =
{'protected_namespaces': ('protect_',), 'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True, 'extra': 'allow'}
model_fields =
{'model': FieldInfo(annotation=Union[CatBoost, NoneType], required=False), 'sample_data': FieldInfo(annotation=Union[List[Any], ndarray[Any, dtype[Any]], NoneType], required=False), 'onnx_model': FieldInfo(annotation=Union[OnnxModel, NoneType], required=False), 'task_type': FieldInfo(annotation=str, required=False, default='undefined'), 'model_type': FieldInfo(annotation=str, required=False, default='undefined'), 'data_type': FieldInfo(annotation=str, required=False, default='undefined'), 'modelcard_uid': FieldInfo(annotation=str, required=False, default=''), 'preprocessor': FieldInfo(annotation=Union[Any, NoneType], required=False), 'preprocessor_name': FieldInfo(annotation=str, required=False, default='undefined')}
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs