opsml.model.interfaces.lgbm
1from pathlib import Path 2from typing import Any, Dict, Optional, Union 3 4import joblib 5import pandas as pd 6from numpy.typing import NDArray 7from pydantic import model_validator 8 9from opsml.helpers.utils import get_class_name 10from opsml.model.interfaces.base import ( 11 ModelInterface, 12 get_model_args, 13 get_processor_name, 14) 15from opsml.types import CommonKwargs, TrainedModelType 16from opsml.types.extra import Suffix 17 18try: 19 import lightgbm as lgb 20 from lightgbm import Booster, LGBMModel 21 22 class LightGBMModel(ModelInterface): 23 """Model interface for LightGBM Booster model class. If using the sklearn API, use SklearnModel instead. 24 25 Args: 26 model: 27 LightGBM booster model 28 preprocessor: 29 Optional preprocessor 30 sample_data: 31 Sample data to be used for type inference. 32 For lightgbm models this should be a pandas DataFrame or numpy array. 33 This should match exactly what the model expects as input. 34 task_type: 35 Task type for model. Defaults to undefined. 36 model_type: 37 Optional model type. This is inferred automatically. 38 preprocessor_name: 39 Optional preprocessor. This is inferred automatically if a 40 preprocessor is provided. 41 onnx_args: 42 Optional arguments for ONNX conversion. See `TorchOnnxArgs` for supported arguments. 43 44 Returns: 45 LightGBMModel 46 """ 47 48 model: Optional[Union[Booster, LGBMModel]] = None 49 sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None 50 preprocessor: Optional[Any] = None 51 preprocessor_name: str = CommonKwargs.UNDEFINED.value 52 53 @property 54 def model_class(self) -> str: 55 if "Booster" in self.model_type: 56 return TrainedModelType.LGBM_BOOSTER.value 57 return TrainedModelType.SKLEARN_ESTIMATOR.value 58 59 @model_validator(mode="before") 60 @classmethod 61 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 62 model = model_args.get("model") 63 64 if model_args.get("modelcard_uid", False): 65 return model_args 66 67 model, module, _ = get_model_args(model) 68 69 if "lightgbm" in module or isinstance(model, LGBMModel): 70 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 71 72 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 73 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 74 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 75 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 76 model_args.get(CommonKwargs.PREPROCESSOR.value), 77 ) 78 79 return model_args 80 81 def save_model(self, path: Path) -> None: 82 """Saves lgb model according to model format. Booster models are saved to text. 83 Sklearn models are saved via joblib. 84 85 Args: 86 path: 87 base path to save model to 88 """ 89 assert self.model is not None, "No model found" 90 if isinstance(self.model, Booster): 91 self.model.save_model(filename=path) 92 93 else: 94 super().save_model(path) 95 96 def load_model(self, path: Path, **kwargs: Any) -> None: 97 """Loads lightgbm booster or sklearn model 98 99 100 Args: 101 path: 102 base path to load from 103 **kwargs: 104 Additional keyword arguments 105 """ 106 107 if self.model_type == TrainedModelType.LGBM_BOOSTER.value: 108 self.model = lgb.Booster(model_file=path) 109 else: 110 super().load_model(path) 111 112 def save_preprocessor(self, path: Path) -> None: 113 """Saves preprocessor to path if present. Base implementation use Joblib 114 115 Args: 116 path: 117 Pathlib object 118 """ 119 assert self.preprocessor is not None, "No preprocessor detected in interface" 120 joblib.dump(self.preprocessor, path) 121 122 def load_preprocessor(self, path: Path) -> None: 123 """Load preprocessor from pathlib object 124 125 Args: 126 path: 127 Pathlib object 128 """ 129 self.preprocessor = joblib.load(path) 130 131 @property 132 def model_suffix(self) -> str: 133 if self.model_type == TrainedModelType.LGBM_BOOSTER.value: 134 return Suffix.TEXT.value 135 136 return super().model_suffix 137 138 @property 139 def preprocessor_suffix(self) -> str: 140 """Returns suffix for storage""" 141 return Suffix.JOBLIB.value 142 143 @staticmethod 144 def name() -> str: 145 return LightGBMModel.__name__ 146 147except ModuleNotFoundError: 148 from opsml.model.interfaces.backups import LightGBMModelNoModule as LightGBMModel
23 class LightGBMModel(ModelInterface): 24 """Model interface for LightGBM Booster model class. If using the sklearn API, use SklearnModel instead. 25 26 Args: 27 model: 28 LightGBM booster model 29 preprocessor: 30 Optional preprocessor 31 sample_data: 32 Sample data to be used for type inference. 33 For lightgbm models this should be a pandas DataFrame or numpy array. 34 This should match exactly what the model expects as input. 35 task_type: 36 Task type for model. Defaults to undefined. 37 model_type: 38 Optional model type. This is inferred automatically. 39 preprocessor_name: 40 Optional preprocessor. This is inferred automatically if a 41 preprocessor is provided. 42 onnx_args: 43 Optional arguments for ONNX conversion. See `TorchOnnxArgs` for supported arguments. 44 45 Returns: 46 LightGBMModel 47 """ 48 49 model: Optional[Union[Booster, LGBMModel]] = None 50 sample_data: Optional[Union[pd.DataFrame, NDArray[Any]]] = None 51 preprocessor: Optional[Any] = None 52 preprocessor_name: str = CommonKwargs.UNDEFINED.value 53 54 @property 55 def model_class(self) -> str: 56 if "Booster" in self.model_type: 57 return TrainedModelType.LGBM_BOOSTER.value 58 return TrainedModelType.SKLEARN_ESTIMATOR.value 59 60 @model_validator(mode="before") 61 @classmethod 62 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 63 model = model_args.get("model") 64 65 if model_args.get("modelcard_uid", False): 66 return model_args 67 68 model, module, _ = get_model_args(model) 69 70 if "lightgbm" in module or isinstance(model, LGBMModel): 71 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 72 73 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 74 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 75 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 76 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 77 model_args.get(CommonKwargs.PREPROCESSOR.value), 78 ) 79 80 return model_args 81 82 def save_model(self, path: Path) -> None: 83 """Saves lgb model according to model format. Booster models are saved to text. 84 Sklearn models are saved via joblib. 85 86 Args: 87 path: 88 base path to save model to 89 """ 90 assert self.model is not None, "No model found" 91 if isinstance(self.model, Booster): 92 self.model.save_model(filename=path) 93 94 else: 95 super().save_model(path) 96 97 def load_model(self, path: Path, **kwargs: Any) -> None: 98 """Loads lightgbm booster or sklearn model 99 100 101 Args: 102 path: 103 base path to load from 104 **kwargs: 105 Additional keyword arguments 106 """ 107 108 if self.model_type == TrainedModelType.LGBM_BOOSTER.value: 109 self.model = lgb.Booster(model_file=path) 110 else: 111 super().load_model(path) 112 113 def save_preprocessor(self, path: Path) -> None: 114 """Saves preprocessor to path if present. Base implementation use Joblib 115 116 Args: 117 path: 118 Pathlib object 119 """ 120 assert self.preprocessor is not None, "No preprocessor detected in interface" 121 joblib.dump(self.preprocessor, path) 122 123 def load_preprocessor(self, path: Path) -> None: 124 """Load preprocessor from pathlib object 125 126 Args: 127 path: 128 Pathlib object 129 """ 130 self.preprocessor = joblib.load(path) 131 132 @property 133 def model_suffix(self) -> str: 134 if self.model_type == TrainedModelType.LGBM_BOOSTER.value: 135 return Suffix.TEXT.value 136 137 return super().model_suffix 138 139 @property 140 def preprocessor_suffix(self) -> str: 141 """Returns suffix for storage""" 142 return Suffix.JOBLIB.value 143 144 @staticmethod 145 def name() -> str: 146 return LightGBMModel.__name__
Model interface for LightGBM Booster model class. If using the sklearn API, use SklearnModel instead.
Arguments:
- model: LightGBM booster model
- preprocessor: Optional preprocessor
- sample_data: Sample data to be used for type inference. For lightgbm models this should be a pandas DataFrame or numpy array. This should match exactly what the model expects as input.
- task_type: Task type for model. Defaults to undefined.
- model_type: Optional model type. This is inferred automatically.
- preprocessor_name: Optional preprocessor. This is inferred automatically if a preprocessor is provided.
- onnx_args: Optional arguments for ONNX conversion. See
TorchOnnxArgs
for supported arguments.
Returns:
LightGBMModel
@model_validator(mode='before')
@classmethod
def
check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]:
60 @model_validator(mode="before") 61 @classmethod 62 def check_model(cls, model_args: Dict[str, Any]) -> Dict[str, Any]: 63 model = model_args.get("model") 64 65 if model_args.get("modelcard_uid", False): 66 return model_args 67 68 model, module, _ = get_model_args(model) 69 70 if "lightgbm" in module or isinstance(model, LGBMModel): 71 model_args[CommonKwargs.MODEL_TYPE.value] = model.__class__.__name__ 72 73 sample_data = cls._get_sample_data(sample_data=model_args[CommonKwargs.SAMPLE_DATA.value]) 74 model_args[CommonKwargs.SAMPLE_DATA.value] = sample_data 75 model_args[CommonKwargs.DATA_TYPE.value] = get_class_name(sample_data) 76 model_args[CommonKwargs.PREPROCESSOR_NAME.value] = get_processor_name( 77 model_args.get(CommonKwargs.PREPROCESSOR.value), 78 ) 79 80 return model_args
def
save_model(self, path: pathlib.Path) -> None:
82 def save_model(self, path: Path) -> None: 83 """Saves lgb model according to model format. Booster models are saved to text. 84 Sklearn models are saved via joblib. 85 86 Args: 87 path: 88 base path to save model to 89 """ 90 assert self.model is not None, "No model found" 91 if isinstance(self.model, Booster): 92 self.model.save_model(filename=path) 93 94 else: 95 super().save_model(path)
Saves lgb model according to model format. Booster models are saved to text. Sklearn models are saved via joblib.
Arguments:
- path: base path to save model to
def
load_model(self, path: pathlib.Path, **kwargs: Any) -> None:
97 def load_model(self, path: Path, **kwargs: Any) -> None: 98 """Loads lightgbm booster or sklearn model 99 100 101 Args: 102 path: 103 base path to load from 104 **kwargs: 105 Additional keyword arguments 106 """ 107 108 if self.model_type == TrainedModelType.LGBM_BOOSTER.value: 109 self.model = lgb.Booster(model_file=path) 110 else: 111 super().load_model(path)
Loads lightgbm booster or sklearn model
Arguments:
- path: base path to load from
- **kwargs: Additional keyword arguments
def
save_preprocessor(self, path: pathlib.Path) -> None:
113 def save_preprocessor(self, path: Path) -> None: 114 """Saves preprocessor to path if present. Base implementation use Joblib 115 116 Args: 117 path: 118 Pathlib object 119 """ 120 assert self.preprocessor is not None, "No preprocessor detected in interface" 121 joblib.dump(self.preprocessor, path)
Saves preprocessor to path if present. Base implementation use Joblib
Arguments:
- path: Pathlib object
def
load_preprocessor(self, path: pathlib.Path) -> None:
123 def load_preprocessor(self, path: Path) -> None: 124 """Load preprocessor from pathlib object 125 126 Args: 127 path: 128 Pathlib object 129 """ 130 self.preprocessor = joblib.load(path)
Load preprocessor from pathlib object
Arguments:
- path: Pathlib object
model_suffix: str
132 @property 133 def model_suffix(self) -> str: 134 if self.model_type == TrainedModelType.LGBM_BOOSTER.value: 135 return Suffix.TEXT.value 136 137 return super().model_suffix
Returns suffix for storage
preprocessor_suffix: str
139 @property 140 def preprocessor_suffix(self) -> str: 141 """Returns suffix for storage""" 142 return Suffix.JOBLIB.value
Returns suffix for storage
model_config =
{'protected_namespaces': ('protect_',), 'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True, 'extra': 'allow'}
model_fields =
{'model': FieldInfo(annotation=Union[Booster, LGBMModel, NoneType], required=False), 'sample_data': FieldInfo(annotation=Union[DataFrame, ndarray[Any, dtype[Any]], NoneType], required=False), 'onnx_model': FieldInfo(annotation=Union[OnnxModel, NoneType], required=False), 'task_type': FieldInfo(annotation=str, required=False, default='undefined'), 'model_type': FieldInfo(annotation=str, required=False, default='undefined'), 'data_type': FieldInfo(annotation=str, required=False, default='undefined'), 'modelcard_uid': FieldInfo(annotation=str, required=False, default=''), 'preprocessor': FieldInfo(annotation=Union[Any, NoneType], required=False), 'preprocessor_name': FieldInfo(annotation=str, required=False, default='undefined')}
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs