opsml.model.interfaces.catboost_

  1import tempfile
  2from pathlib import Path
  3from typing import Any, Dict, List, Optional, Union, cast
  4
  5import joblib
  6import numpy as np
  7from numpy.typing import NDArray
  8from pydantic import model_validator
  9
 10from opsml.helpers.logging import ArtifactLogger
 11from opsml.helpers.utils import get_class_name
 12from opsml.model.interfaces.base import (
 13    ModelInterface,
 14    SamplePrediction,
 15    get_model_args,
 16    get_processor_name,
 17)
 18from opsml.types import (
 19    CommonKwargs,
 20    ModelReturn,
 21    OnnxModel,
 22    SaveName,
 23    Suffix,
 24    TrainedModelType,
 25)
 26
 27logger = ArtifactLogger.get_logger()
 28
 29try:
 30    from catboost import CatBoost
 31
 32    class CatBoostModel(ModelInterface):
 33        """Model interface for CatBoost models.
 34
 35        Args:
 36            model:
 37                CatBoost model (Classifier, Regressor, Ranker)
 38            preprocessor:
 39                Optional preprocessor
 40            sample_data:
 41                Sample data to be used for type inference and sample prediction.
 42                For catboost models this should be a numpy array (either 1d or 2d) or list of feature values.
 43                This should match exactly what the model expects as input.
 44            task_type:
 45                Task type for model. Defaults to undefined.
 46            model_type:
 47                Optional model type. This is inferred automatically.
 48            preprocessor_name:
 49                Optional preprocessor name. This is inferred automatically if a
 50                preprocessor is provided.
 51
 52        Returns:
 53            CatBoostModel
 54        """
 55
 56        model: Optional[CatBoost] = None
 57        sample_data: Optional[Union[List[Any], NDArray[Any]]] = None
 58        preprocessor: Optional[Any] = None
 59        preprocessor_name: str = CommonKwargs.UNDEFINED.value
 60
 61        @classmethod
 62        def _get_sample_data(cls, sample_data: NDArray[Any]) -> Union[List[Any], NDArray[Any]]:
 63            """Check sample data and returns one record to be used
 64            during type inference and sample prediction.
 65
 66            Returns:
 67                Sample data with only one record
 68            """
 69
 70            if isinstance(sample_data, list):
 71                return sample_data
 72
 73            if isinstance(sample_data, np.ndarray):
 74                if len(sample_data.shape) == 1:
 75                    return sample_data.reshape(1, -1)
 76                return sample_data[0:1]
 77
 78            raise ValueError("Sample data should be a list or numpy array")
 79
 80        def get_sample_prediction(self) -> SamplePrediction:
 81            assert self.model is not None, "Model is not defined"
 82            assert self.sample_data is not None, "Sample data must be provided"
 83
 84            prediction = self.model.predict(self.sample_data)
 85
 86            prediction_type = get_class_name(prediction)
 87
 88            return SamplePrediction(prediction_type, prediction)
 89
 90        @property
 91        def model_class(self) -> str:
 92            return TrainedModelType.CATBOOST.value
 93
 94        @model_validator(mode="before")
 95        @classmethod
 96        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 97            model = model_args.get("model")
 98
 99            if model_args.get("modelcard_uid", False):
100                return model_args
101
102            model, module, bases = get_model_args(model)
103
104            if "catboost" in module:
105                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
106
107            else:
108                for base in bases:
109                    if "catboost" in base:
110                        model_args[CommonKwargs.MODEL_TYPE.value] = "subclass"
111
112            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
113            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
114            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
115            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
116                model_args.get(CommonKwargs.PREPROCESSOR.value),
117            )
118
119            return model_args
120
121        def save_model(self, path: Path) -> None:
122            """Saves model to path. Base implementation use Joblib
123
124            Args:
125                path:
126                    Pathlib object
127            """
128            assert self.model is not None, "No model detected in interface"
129            self.model.save_model(path.as_posix())
130
131        def load_model(self, path: Path, **kwargs: Any) -> None:
132            """Load model from pathlib object
133
134            Args:
135                path:
136                    Pathlib object
137                kwargs:
138                    Additional kwargs
139            """
140            import catboost
141
142            model = getattr(catboost, self.model_type, CatBoost)()
143            self.model = model.load_model(path.as_posix())
144
145        def _convert_to_onnx_inplace(self) -> None:
146            """Convert to onnx model using temp dir"""
147            with tempfile.TemporaryDirectory() as tmpdir:
148                lpath = Path(tmpdir) / SaveName.ONNX_MODEL.value
149                onnx_path = lpath.with_suffix(Suffix.ONNX.value)
150                self.convert_to_onnx(**{"path": onnx_path})
151
152        def convert_to_onnx(self, **kwargs: Path) -> None:
153            """Converts model to onnx format"""
154
155            logger.info("Converting CatBoost model to onnx format")
156
157            import onnx
158            import onnxruntime as rt
159
160            if self.onnx_model is not None:
161                return None
162
163            path: Optional[Path] = kwargs.get("path")
164            if path is None:
165                return self._convert_to_onnx_inplace()
166
167            assert self.model is not None, "No model detected in interface"
168            self.model.save_model(
169                path.as_posix(),
170                format="onnx",
171                export_parameters={"onnx_domain": "ai.catboost"},
172            )
173            self.onnx_model = OnnxModel(
174                onnx_version=onnx.__version__,
175                sess=rt.InferenceSession(path.as_posix()),
176            )
177            return None
178
179        def save_onnx(self, path: Path) -> ModelReturn:
180            import onnxruntime as rt
181
182            from opsml.model.onnx import _get_onnx_metadata
183
184            if self.onnx_model is None:
185                self.convert_to_onnx(**{"path": path})
186
187            else:
188                self.onnx_model.sess_to_path(path)
189
190            assert self.onnx_model is not None, "No onnx model detected in interface"
191
192            # no need to save onnx to bytes since its done during onnx conversion
193            return _get_onnx_metadata(self, cast(rt.InferenceSession, self.onnx_model.sess))
194
195        def save_preprocessor(self, path: Path) -> None:
196            """Saves preprocessor to path if present. Base implementation use Joblib
197
198            Args:
199                path:
200                    Pathlib object
201            """
202            assert self.preprocessor is not None, "No preprocessor detected in interface"
203            joblib.dump(self.preprocessor, path)
204
205        def load_preprocessor(self, path: Path) -> None:
206            """Load preprocessor from pathlib object
207
208            Args:
209                path:
210                    Pathlib object
211            """
212            self.preprocessor = joblib.load(path)
213
214        @property
215        def preprocessor_suffix(self) -> str:
216            """Returns suffix for storage"""
217            return Suffix.JOBLIB.value
218
219        @property
220        def model_suffix(self) -> str:
221            """Returns suffix for storage"""
222            return Suffix.CATBOOST.value
223
224        @staticmethod
225        def name() -> str:
226            return CatBoostModel.__name__
227
228except ModuleNotFoundError:
229    from opsml.model.interfaces.backups import CatBoostModelNoModule as CatBoostModel
logger = <builtins.Logger object>
class CatBoostModel(opsml.model.interfaces.base.ModelInterface):
 33    class CatBoostModel(ModelInterface):
 34        """Model interface for CatBoost models.
 35
 36        Args:
 37            model:
 38                CatBoost model (Classifier, Regressor, Ranker)
 39            preprocessor:
 40                Optional preprocessor
 41            sample_data:
 42                Sample data to be used for type inference and sample prediction.
 43                For catboost models this should be a numpy array (either 1d or 2d) or list of feature values.
 44                This should match exactly what the model expects as input.
 45            task_type:
 46                Task type for model. Defaults to undefined.
 47            model_type:
 48                Optional model type. This is inferred automatically.
 49            preprocessor_name:
 50                Optional preprocessor name. This is inferred automatically if a
 51                preprocessor is provided.
 52
 53        Returns:
 54            CatBoostModel
 55        """
 56
 57        model: Optional[CatBoost] = None
 58        sample_data: Optional[Union[List[Any], NDArray[Any]]] = None
 59        preprocessor: Optional[Any] = None
 60        preprocessor_name: str = CommonKwargs.UNDEFINED.value
 61
 62        @classmethod
 63        def _get_sample_data(cls, sample_data: NDArray[Any]) -> Union[List[Any], NDArray[Any]]:
 64            """Check sample data and returns one record to be used
 65            during type inference and sample prediction.
 66
 67            Returns:
 68                Sample data with only one record
 69            """
 70
 71            if isinstance(sample_data, list):
 72                return sample_data
 73
 74            if isinstance(sample_data, np.ndarray):
 75                if len(sample_data.shape) == 1:
 76                    return sample_data.reshape(1, -1)
 77                return sample_data[0:1]
 78
 79            raise ValueError("Sample data should be a list or numpy array")
 80
 81        def get_sample_prediction(self) -> SamplePrediction:
 82            assert self.model is not None, "Model is not defined"
 83            assert self.sample_data is not None, "Sample data must be provided"
 84
 85            prediction = self.model.predict(self.sample_data)
 86
 87            prediction_type = get_class_name(prediction)
 88
 89            return SamplePrediction(prediction_type, prediction)
 90
 91        @property
 92        def model_class(self) -> str:
 93            return TrainedModelType.CATBOOST.value
 94
 95        @model_validator(mode="before")
 96        @classmethod
 97        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 98            model = model_args.get("model")
 99
100            if model_args.get("modelcard_uid", False):
101                return model_args
102
103            model, module, bases = get_model_args(model)
104
105            if "catboost" in module:
106                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
107
108            else:
109                for base in bases:
110                    if "catboost" in base:
111                        model_args[CommonKwargs.MODEL_TYPE.value] = "subclass"
112
113            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
114            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
115            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
116            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
117                model_args.get(CommonKwargs.PREPROCESSOR.value),
118            )
119
120            return model_args
121
122        def save_model(self, path: Path) -> None:
123            """Saves model to path. Base implementation use Joblib
124
125            Args:
126                path:
127                    Pathlib object
128            """
129            assert self.model is not None, "No model detected in interface"
130            self.model.save_model(path.as_posix())
131
132        def load_model(self, path: Path, **kwargs: Any) -> None:
133            """Load model from pathlib object
134
135            Args:
136                path:
137                    Pathlib object
138                kwargs:
139                    Additional kwargs
140            """
141            import catboost
142
143            model = getattr(catboost, self.model_type, CatBoost)()
144            self.model = model.load_model(path.as_posix())
145
146        def _convert_to_onnx_inplace(self) -> None:
147            """Convert to onnx model using temp dir"""
148            with tempfile.TemporaryDirectory() as tmpdir:
149                lpath = Path(tmpdir) / SaveName.ONNX_MODEL.value
150                onnx_path = lpath.with_suffix(Suffix.ONNX.value)
151                self.convert_to_onnx(**{"path": onnx_path})
152
153        def convert_to_onnx(self, **kwargs: Path) -> None:
154            """Converts model to onnx format"""
155
156            logger.info("Converting CatBoost model to onnx format")
157
158            import onnx
159            import onnxruntime as rt
160
161            if self.onnx_model is not None:
162                return None
163
164            path: Optional[Path] = kwargs.get("path")
165            if path is None:
166                return self._convert_to_onnx_inplace()
167
168            assert self.model is not None, "No model detected in interface"
169            self.model.save_model(
170                path.as_posix(),
171                format="onnx",
172                export_parameters={"onnx_domain": "ai.catboost"},
173            )
174            self.onnx_model = OnnxModel(
175                onnx_version=onnx.__version__,
176                sess=rt.InferenceSession(path.as_posix()),
177            )
178            return None
179
180        def save_onnx(self, path: Path) -> ModelReturn:
181            import onnxruntime as rt
182
183            from opsml.model.onnx import _get_onnx_metadata
184
185            if self.onnx_model is None:
186                self.convert_to_onnx(**{"path": path})
187
188            else:
189                self.onnx_model.sess_to_path(path)
190
191            assert self.onnx_model is not None, "No onnx model detected in interface"
192
193            # no need to save onnx to bytes since its done during onnx conversion
194            return _get_onnx_metadata(self, cast(rt.InferenceSession, self.onnx_model.sess))
195
196        def save_preprocessor(self, path: Path) -> None:
197            """Saves preprocessor to path if present. Base implementation use Joblib
198
199            Args:
200                path:
201                    Pathlib object
202            """
203            assert self.preprocessor is not None, "No preprocessor detected in interface"
204            joblib.dump(self.preprocessor, path)
205
206        def load_preprocessor(self, path: Path) -> None:
207            """Load preprocessor from pathlib object
208
209            Args:
210                path:
211                    Pathlib object
212            """
213            self.preprocessor = joblib.load(path)
214
215        @property
216        def preprocessor_suffix(self) -> str:
217            """Returns suffix for storage"""
218            return Suffix.JOBLIB.value
219
220        @property
221        def model_suffix(self) -> str:
222            """Returns suffix for storage"""
223            return Suffix.CATBOOST.value
224
225        @staticmethod
226        def name() -> str:
227            return CatBoostModel.__name__

Model interface for CatBoost models.

Arguments:
  • model: CatBoost model (Classifier, Regressor, Ranker)
  • preprocessor: Optional preprocessor
  • sample_data: Sample data to be used for type inference and sample prediction. For catboost models this should be a numpy array (either 1d or 2d) or list of feature values. This should match exactly what the model expects as input.
  • task_type: Task type for model. Defaults to undefined.
  • model_type: Optional model type. This is inferred automatically.
  • preprocessor_name: Optional preprocessor name. This is inferred automatically if a preprocessor is provided.
Returns:

CatBoostModel

model: Optional[catboost.core.CatBoost]
sample_data: Union[List[Any], numpy.ndarray[Any, numpy.dtype[Any]], NoneType]
preprocessor: Optional[Any]
preprocessor_name: str
def get_sample_prediction(self) -> opsml.model.interfaces.base.SamplePrediction:
81        def get_sample_prediction(self) -> SamplePrediction:
82            assert self.model is not None, "Model is not defined"
83            assert self.sample_data is not None, "Sample data must be provided"
84
85            prediction = self.model.predict(self.sample_data)
86
87            prediction_type = get_class_name(prediction)
88
89            return SamplePrediction(prediction_type, prediction)
model_class: str
91        @property
92        def model_class(self) -> str:
93            return TrainedModelType.CATBOOST.value
@model_validator(mode='before')
@classmethod
def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 95        @model_validator(mode="before")
 96        @classmethod
 97        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 98            model = model_args.get("model")
 99
100            if model_args.get("modelcard_uid", False):
101                return model_args
102
103            model, module, bases = get_model_args(model)
104
105            if "catboost" in module:
106                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
107
108            else:
109                for base in bases:
110                    if "catboost" in base:
111                        model_args[CommonKwargs.MODEL_TYPE.value] = "subclass"
112
113            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
114            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
115            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
116            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
117                model_args.get(CommonKwargs.PREPROCESSOR.value),
118            )
119
120            return model_args
def save_model(self, path: pathlib.Path) -> None:
122        def save_model(self, path: Path) -> None:
123            """Saves model to path. Base implementation use Joblib
124
125            Args:
126                path:
127                    Pathlib object
128            """
129            assert self.model is not None, "No model detected in interface"
130            self.model.save_model(path.as_posix())

Saves model to path. Base implementation use Joblib

Arguments:
  • path: Pathlib object
def load_model(self, path: pathlib.Path, **kwargs: Any) -> None:
132        def load_model(self, path: Path, **kwargs: Any) -> None:
133            """Load model from pathlib object
134
135            Args:
136                path:
137                    Pathlib object
138                kwargs:
139                    Additional kwargs
140            """
141            import catboost
142
143            model = getattr(catboost, self.model_type, CatBoost)()
144            self.model = model.load_model(path.as_posix())

Load model from pathlib object

Arguments:
  • path: Pathlib object
  • kwargs: Additional kwargs
def convert_to_onnx(self, **kwargs: pathlib.Path) -> None:
153        def convert_to_onnx(self, **kwargs: Path) -> None:
154            """Converts model to onnx format"""
155
156            logger.info("Converting CatBoost model to onnx format")
157
158            import onnx
159            import onnxruntime as rt
160
161            if self.onnx_model is not None:
162                return None
163
164            path: Optional[Path] = kwargs.get("path")
165            if path is None:
166                return self._convert_to_onnx_inplace()
167
168            assert self.model is not None, "No model detected in interface"
169            self.model.save_model(
170                path.as_posix(),
171                format="onnx",
172                export_parameters={"onnx_domain": "ai.catboost"},
173            )
174            self.onnx_model = OnnxModel(
175                onnx_version=onnx.__version__,
176                sess=rt.InferenceSession(path.as_posix()),
177            )
178            return None

Converts model to onnx format

def save_onnx(self, path: pathlib.Path) -> opsml.types.model.ModelReturn:
180        def save_onnx(self, path: Path) -> ModelReturn:
181            import onnxruntime as rt
182
183            from opsml.model.onnx import _get_onnx_metadata
184
185            if self.onnx_model is None:
186                self.convert_to_onnx(**{"path": path})
187
188            else:
189                self.onnx_model.sess_to_path(path)
190
191            assert self.onnx_model is not None, "No onnx model detected in interface"
192
193            # no need to save onnx to bytes since its done during onnx conversion
194            return _get_onnx_metadata(self, cast(rt.InferenceSession, self.onnx_model.sess))

Saves the onnx model

Arguments:
  • path: Path to save
Returns:

ModelReturn

def save_preprocessor(self, path: pathlib.Path) -> None:
196        def save_preprocessor(self, path: Path) -> None:
197            """Saves preprocessor to path if present. Base implementation use Joblib
198
199            Args:
200                path:
201                    Pathlib object
202            """
203            assert self.preprocessor is not None, "No preprocessor detected in interface"
204            joblib.dump(self.preprocessor, path)

Saves preprocessor to path if present. Base implementation use Joblib

Arguments:
  • path: Pathlib object
def load_preprocessor(self, path: pathlib.Path) -> None:
206        def load_preprocessor(self, path: Path) -> None:
207            """Load preprocessor from pathlib object
208
209            Args:
210                path:
211                    Pathlib object
212            """
213            self.preprocessor = joblib.load(path)

Load preprocessor from pathlib object

Arguments:
  • path: Pathlib object
preprocessor_suffix: str
215        @property
216        def preprocessor_suffix(self) -> str:
217            """Returns suffix for storage"""
218            return Suffix.JOBLIB.value

Returns suffix for storage

model_suffix: str
220        @property
221        def model_suffix(self) -> str:
222            """Returns suffix for storage"""
223            return Suffix.CATBOOST.value

Returns suffix for storage

@staticmethod
def name() -> str:
225        @staticmethod
226        def name() -> str:
227            return CatBoostModel.__name__
model_config = {'protected_namespaces': ('protect_',), 'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True, 'extra': 'allow'}
model_fields = {'model': FieldInfo(annotation=Union[CatBoost, NoneType], required=False), 'sample_data': FieldInfo(annotation=Union[List[Any], ndarray[Any, dtype[Any]], NoneType], required=False), 'onnx_model': FieldInfo(annotation=Union[OnnxModel, NoneType], required=False), 'task_type': FieldInfo(annotation=str, required=False, default='undefined'), 'model_type': FieldInfo(annotation=str, required=False, default='undefined'), 'data_type': FieldInfo(annotation=str, required=False, default='undefined'), 'modelcard_uid': FieldInfo(annotation=str, required=False, default=''), 'preprocessor': FieldInfo(annotation=Union[Any, NoneType], required=False), 'preprocessor_name': FieldInfo(annotation=str, required=False, default='undefined')}
model_computed_fields = {}
Inherited Members
pydantic.main.BaseModel
BaseModel
model_extra
model_fields_set
model_construct
model_copy
model_dump
model_dump_json
model_json_schema
model_parametrized_name
model_post_init
model_rebuild
model_validate
model_validate_json
model_validate_strings
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
opsml.model.interfaces.base.ModelInterface
onnx_model
task_type
model_type
data_type
modelcard_uid
check_modelcard_uid
load_onnx_model
save_sample_data
load_sample_data
data_suffix