opsml.model.interfaces.lgbm

  1from pathlib import Path
  2from typing import Any, Dict, Optional, Union
  3
  4import joblib
  5import pandas as pd
  6from numpy.typing import NDArray
  7from pydantic import model_validator
  8
  9from opsml.helpers.utils import get_class_name
 10from opsml.model.interfaces.base import (
 11    ModelInterface,
 12    get_model_args,
 13    get_processor_name,
 14)
 15from opsml.types import CommonKwargs, TrainedModelType
 16from opsml.types.extra import Suffix
 17
 18try:
 19    import lightgbm as lgb
 20    from lightgbm import Booster, LGBMModel
 21
 22    class LightGBMModel(ModelInterface):
 23        """Model interface for LightGBM Booster model class. If using the sklearn API, use SklearnModel instead.
 24
 25        Args:
 26            model:
 27                LightGBM booster model
 28            preprocessor:
 29                Optional preprocessor
 30            sample_data:
 31                Sample data to be used for type inference.
 32                For lightgbm models this should be a pandas DataFrame or numpy array.
 33                This should match exactly what the model expects as input.
 34            task_type:
 35                Task type for model. Defaults to undefined.
 36            model_type:
 37                Optional model type. This is inferred automatically.
 38            preprocessor_name:
 39                Optional preprocessor. This is inferred automatically if a
 40                preprocessor is provided.
 41            onnx_args:
 42                Optional arguments for ONNX conversion. See `TorchOnnxArgs` for supported arguments.
 43
 44        Returns:
 45            LightGBMModel
 46        """
 47
 48        model: Optional[Union[Booster, LGBMModel]] = None
 49        sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None
 50        preprocessor: Optional[Any] = None
 51        preprocessor_name: str = CommonKwargs.UNDEFINED.value
 52
 53        @property
 54        def model_class(self) -> str:
 55            if "Booster" in self.model_type:
 56                return TrainedModelType.LGBM_BOOSTER.value
 57            return TrainedModelType.SKLEARN_ESTIMATOR.value
 58
 59        @model_validator(mode="before")
 60        @classmethod
 61        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 62            model = model_args.get("model")
 63
 64            if model_args.get("modelcard_uid", False):
 65                return model_args
 66
 67            model, module, _ = get_model_args(model)
 68
 69            if "lightgbm" in module or isinstance(model, LGBMModel):
 70                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
 71
 72            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
 73            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
 74            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
 75            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
 76                model_args.get(CommonKwargs.PREPROCESSOR.value),
 77            )
 78
 79            return model_args
 80
 81        def save_model(self, path: Path) -> None:
 82            """Saves lgb model according to model format. Booster models are saved to text.
 83            Sklearn models are saved via joblib.
 84
 85            Args:
 86                path:
 87                    base path to save model to
 88            """
 89            assert self.model is not None, "No model found"
 90            if isinstance(self.model, Booster):
 91                self.model.save_model(filename=path)
 92
 93            else:
 94                super().save_model(path)
 95
 96        def load_model(self, path: Path, **kwargs: Any) -> None:
 97            """Loads lightgbm booster or sklearn model
 98
 99
100            Args:
101                path:
102                    base path to load from
103                **kwargs:
104                    Additional keyword arguments
105            """
106
107            if self.model_type == TrainedModelType.LGBM_BOOSTER.value:
108                self.model = lgb.Booster(model_file=path)
109            else:
110                super().load_model(path)
111
112        def save_preprocessor(self, path: Path) -> None:
113            """Saves preprocessor to path if present. Base implementation use Joblib
114
115            Args:
116                path:
117                    Pathlib object
118            """
119            assert self.preprocessor is not None, "No preprocessor detected in interface"
120            joblib.dump(self.preprocessor, path)
121
122        def load_preprocessor(self, path: Path) -> None:
123            """Load preprocessor from pathlib object
124
125            Args:
126                path:
127                    Pathlib object
128            """
129            self.preprocessor = joblib.load(path)
130
131        @property
132        def model_suffix(self) -> str:
133            if self.model_type == TrainedModelType.LGBM_BOOSTER.value:
134                return Suffix.TEXT.value
135
136            return super().model_suffix
137
138        @property
139        def preprocessor_suffix(self) -> str:
140            """Returns suffix for storage"""
141            return Suffix.JOBLIB.value
142
143        @staticmethod
144        def name() -> str:
145            return LightGBMModel.__name__
146
147except ModuleNotFoundError:
148    from opsml.model.interfaces.backups import LightGBMModelNoModule as LightGBMModel
class LightGBMModel(opsml.model.interfaces.base.ModelInterface):
 23    class LightGBMModel(ModelInterface):
 24        """Model interface for LightGBM Booster model class. If using the sklearn API, use SklearnModel instead.
 25
 26        Args:
 27            model:
 28                LightGBM booster model
 29            preprocessor:
 30                Optional preprocessor
 31            sample_data:
 32                Sample data to be used for type inference.
 33                For lightgbm models this should be a pandas DataFrame or numpy array.
 34                This should match exactly what the model expects as input.
 35            task_type:
 36                Task type for model. Defaults to undefined.
 37            model_type:
 38                Optional model type. This is inferred automatically.
 39            preprocessor_name:
 40                Optional preprocessor. This is inferred automatically if a
 41                preprocessor is provided.
 42            onnx_args:
 43                Optional arguments for ONNX conversion. See `TorchOnnxArgs` for supported arguments.
 44
 45        Returns:
 46            LightGBMModel
 47        """
 48
 49        model: Optional[Union[Booster, LGBMModel]] = None
 50        sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None
 51        preprocessor: Optional[Any] = None
 52        preprocessor_name: str = CommonKwargs.UNDEFINED.value
 53
 54        @property
 55        def model_class(self) -> str:
 56            if "Booster" in self.model_type:
 57                return TrainedModelType.LGBM_BOOSTER.value
 58            return TrainedModelType.SKLEARN_ESTIMATOR.value
 59
 60        @model_validator(mode="before")
 61        @classmethod
 62        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
 63            model = model_args.get("model")
 64
 65            if model_args.get("modelcard_uid", False):
 66                return model_args
 67
 68            model, module, _ = get_model_args(model)
 69
 70            if "lightgbm" in module or isinstance(model, LGBMModel):
 71                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
 72
 73            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
 74            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
 75            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
 76            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
 77                model_args.get(CommonKwargs.PREPROCESSOR.value),
 78            )
 79
 80            return model_args
 81
 82        def save_model(self, path: Path) -> None:
 83            """Saves lgb model according to model format. Booster models are saved to text.
 84            Sklearn models are saved via joblib.
 85
 86            Args:
 87                path:
 88                    base path to save model to
 89            """
 90            assert self.model is not None, "No model found"
 91            if isinstance(self.model, Booster):
 92                self.model.save_model(filename=path)
 93
 94            else:
 95                super().save_model(path)
 96
 97        def load_model(self, path: Path, **kwargs: Any) -> None:
 98            """Loads lightgbm booster or sklearn model
 99
100
101            Args:
102                path:
103                    base path to load from
104                **kwargs:
105                    Additional keyword arguments
106            """
107
108            if self.model_type == TrainedModelType.LGBM_BOOSTER.value:
109                self.model = lgb.Booster(model_file=path)
110            else:
111                super().load_model(path)
112
113        def save_preprocessor(self, path: Path) -> None:
114            """Saves preprocessor to path if present. Base implementation use Joblib
115
116            Args:
117                path:
118                    Pathlib object
119            """
120            assert self.preprocessor is not None, "No preprocessor detected in interface"
121            joblib.dump(self.preprocessor, path)
122
123        def load_preprocessor(self, path: Path) -> None:
124            """Load preprocessor from pathlib object
125
126            Args:
127                path:
128                    Pathlib object
129            """
130            self.preprocessor = joblib.load(path)
131
132        @property
133        def model_suffix(self) -> str:
134            if self.model_type == TrainedModelType.LGBM_BOOSTER.value:
135                return Suffix.TEXT.value
136
137            return super().model_suffix
138
139        @property
140        def preprocessor_suffix(self) -> str:
141            """Returns suffix for storage"""
142            return Suffix.JOBLIB.value
143
144        @staticmethod
145        def name() -> str:
146            return LightGBMModel.__name__

Model interface for LightGBM Booster model class. If using the sklearn API, use SklearnModel instead.

Arguments:
  • model: LightGBM booster model
  • preprocessor: Optional preprocessor
  • sample_data: Sample data to be used for type inference. For lightgbm models this should be a pandas DataFrame or numpy array. This should match exactly what the model expects as input.
  • task_type: Task type for model. Defaults to undefined.
  • model_type: Optional model type. This is inferred automatically.
  • preprocessor_name: Optional preprocessor. This is inferred automatically if a preprocessor is provided.
  • onnx_args: Optional arguments for ONNX conversion. See TorchOnnxArgs for supported arguments.
Returns:

LightGBMModel

model: Union[lightgbm.basic.Booster, lightgbm.sklearn.LGBMModel, NoneType]
sample_data: Union[pandas.core.frame.DataFrame, numpy.ndarray[Any, numpy.dtype[Any]], NoneType]
preprocessor: Optional[Any]
preprocessor_name: str
model_class: str
54        @property
55        def model_class(self) -> str:
56            if "Booster" in self.model_type:
57                return TrainedModelType.LGBM_BOOSTER.value
58            return TrainedModelType.SKLEARN_ESTIMATOR.value
@model_validator(mode='before')
@classmethod
def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
60        @model_validator(mode="before")
61        @classmethod
62        def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
63            model = model_args.get("model")
64
65            if model_args.get("modelcard_uid", False):
66                return model_args
67
68            model, module, _ = get_model_args(model)
69
70            if "lightgbm" in module or isinstance(model, LGBMModel):
71                model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__
72
73            sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value])
74            model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data
75            model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data)
76            model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name(
77                model_args.get(CommonKwargs.PREPROCESSOR.value),
78            )
79
80            return model_args
def save_model(self, path: pathlib.Path) -> None:
82        def save_model(self, path: Path) -> None:
83            """Saves lgb model according to model format. Booster models are saved to text.
84            Sklearn models are saved via joblib.
85
86            Args:
87                path:
88                    base path to save model to
89            """
90            assert self.model is not None, "No model found"
91            if isinstance(self.model, Booster):
92                self.model.save_model(filename=path)
93
94            else:
95                super().save_model(path)

Saves lgb model according to model format. Booster models are saved to text. Sklearn models are saved via joblib.

Arguments:
  • path: base path to save model to
def load_model(self, path: pathlib.Path, **kwargs: Any) -> None:
 97        def load_model(self, path: Path, **kwargs: Any) -> None:
 98            """Loads lightgbm booster or sklearn model
 99
100
101            Args:
102                path:
103                    base path to load from
104                **kwargs:
105                    Additional keyword arguments
106            """
107
108            if self.model_type == TrainedModelType.LGBM_BOOSTER.value:
109                self.model = lgb.Booster(model_file=path)
110            else:
111                super().load_model(path)

Loads lightgbm booster or sklearn model

Arguments:
  • path: base path to load from
  • **kwargs: Additional keyword arguments
def save_preprocessor(self, path: pathlib.Path) -> None:
113        def save_preprocessor(self, path: Path) -> None:
114            """Saves preprocessor to path if present. Base implementation use Joblib
115
116            Args:
117                path:
118                    Pathlib object
119            """
120            assert self.preprocessor is not None, "No preprocessor detected in interface"
121            joblib.dump(self.preprocessor, path)

Saves preprocessor to path if present. Base implementation use Joblib

Arguments:
  • path: Pathlib object
def load_preprocessor(self, path: pathlib.Path) -> None:
123        def load_preprocessor(self, path: Path) -> None:
124            """Load preprocessor from pathlib object
125
126            Args:
127                path:
128                    Pathlib object
129            """
130            self.preprocessor = joblib.load(path)

Load preprocessor from pathlib object

Arguments:
  • path: Pathlib object
model_suffix: str
132        @property
133        def model_suffix(self) -> str:
134            if self.model_type == TrainedModelType.LGBM_BOOSTER.value:
135                return Suffix.TEXT.value
136
137            return super().model_suffix

Returns suffix for storage

preprocessor_suffix: str
139        @property
140        def preprocessor_suffix(self) -> str:
141            """Returns suffix for storage"""
142            return Suffix.JOBLIB.value

Returns suffix for storage

@staticmethod
def name() -> str:
144        @staticmethod
145        def name() -> str:
146            return LightGBMModel.__name__
model_config = {'protected_namespaces': ('protect_',), 'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True, 'extra': 'allow'}
model_fields = {'model': FieldInfo(annotation=Union[Booster, LGBMModel, NoneType], required=False), 'sample_data': FieldInfo(annotation=Union[DataFrame, ndarray[Any, dtype[Any]], NoneType], required=False), 'onnx_model': FieldInfo(annotation=Union[OnnxModel, NoneType], required=False), 'task_type': FieldInfo(annotation=str, required=False, default='undefined'), 'model_type': FieldInfo(annotation=str, required=False, default='undefined'), 'data_type': FieldInfo(annotation=str, required=False, default='undefined'), 'modelcard_uid': FieldInfo(annotation=str, required=False, default=''), 'preprocessor': FieldInfo(annotation=Union[Any, NoneType], required=False), 'preprocessor_name': FieldInfo(annotation=str, required=False, default='undefined')}
model_computed_fields = {}
Inherited Members
pydantic.main.BaseModel
BaseModel
model_extra
model_fields_set
model_construct
model_copy
model_dump
model_dump_json
model_json_schema
model_parametrized_name
model_post_init
model_rebuild
model_validate
model_validate_json
model_validate_strings
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
opsml.model.interfaces.base.ModelInterface
onnx_model
task_type
model_type
data_type
modelcard_uid
check_modelcard_uid
save_onnx
convert_to_onnx
load_onnx_model
save_sample_data
load_sample_data
get_sample_prediction
data_suffix