opsml.cards.audit
1# mypy: disable-error-code="call-arg" 2# Copyright (c) Shipt, Inc. 3# This source code is licensed under the MIT license found in the 4# LICENSE file in the root directory of this source tree. 5 6import os 7from typing import Any, Dict, List, Optional, cast 8 9import yaml 10from pydantic import BaseModel, ConfigDict, SerializeAsAny, model_validator 11from rich.console import Console 12from rich.table import Table 13 14from opsml.cards.base import ArtifactCard 15from opsml.helpers.logging import ArtifactLogger 16from opsml.types import ( 17 AuditCardMetadata, 18 AuditSectionType, 19 CardType, 20 CardVersion, 21 Comment, 22) 23 24logger = ArtifactLogger.get_logger() 25DIR_PATH = os.path.dirname(__file__) 26AUDIT_TEMPLATE_PATH = os.path.join(DIR_PATH, "templates/audit_card.yaml") 27 28 29# create new python class that inherits from ArtifactCard and is called AuditCard 30class Question(BaseModel): 31 question: str 32 purpose: str 33 response: Optional[str] = None 34 35 model_config = ConfigDict(frozen=False) 36 37 38class AuditSections(BaseModel): 39 business_understanding: Dict[int, SerializeAsAny[Question]] 40 data_understanding: Dict[int, SerializeAsAny[Question]] 41 data_preparation: Dict[int, SerializeAsAny[Question]] 42 modeling: Dict[int, SerializeAsAny[Question]] 43 evaluation: Dict[int, SerializeAsAny[Question]] 44 deployment_ops: Dict[int, SerializeAsAny[Question]] 45 misc: Dict[int, SerializeAsAny[Question]] 46 47 @model_validator(mode="before") 48 @classmethod 49 def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]: 50 """Loads audit sections from template if no values are provided""" 51 52 if any(values): 53 return values 54 return cls.load_yaml_template() 55 56 @staticmethod 57 def load_yaml_template() -> AuditSectionType: 58 with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream: 59 try: 60 audit_sections = cast(AuditSectionType, yaml.safe_load(stream)) 61 except yaml.YAMLError as exc: 62 raise exc 63 return audit_sections 64 65 66class AuditQuestionTable: 67 """Helper class for creating a rich table to be used with an AuditCard""" 68 69 def __init__(self) -> None: 70 self.table = self.create_table() 71 72 def create_table(self) -> Table: 73 """Create Rich table of Audit""" 74 table = Table(title="Audit Questions") 75 table.add_column("Section", no_wrap=True) 76 table.add_column("Number") 77 table.add_column("Question") 78 table.add_column("Answered", justify="right") 79 return table 80 81 def add_row(self, section_name: str, nbr: int, question: Question) -> None: 82 """Add row to table""" 83 self.table.add_row( 84 section_name, 85 str(nbr), 86 question.question, 87 "Yes" if question.response else "No", 88 ) 89 90 def add_section(self) -> None: 91 """Add section""" 92 self.table.add_section() 93 94 def print_table(self) -> None: 95 """Print table""" 96 console = Console() 97 console.print(self.table) 98 99 100class AuditCard(ArtifactCard): 101 """ 102 Creates an AuditCard for storing audit-related information about a 103 machine learning project. 104 105 Args: 106 name: 107 What to name the AuditCard 108 repository: 109 Repository that this card is associated with 110 contact: 111 Contact to associate with the AuditCard 112 info: 113 `CardInfo` object containing additional metadata. If provided, it will override any 114 values provided for `name`, `repository`, `contact`, and `version`. 115 116 Name, repository, and contact are required arguments for all cards. They can be provided 117 directly or through a `CardInfo` object. 118 119 audit: 120 AuditSections object containing the audit questions and responses 121 approved: 122 Whether the audit has been approved 123 """ 124 125 audit: AuditSections = AuditSections() 126 approved: bool = False 127 comments: List[SerializeAsAny[Comment]] = [] 128 metadata: AuditCardMetadata = AuditCardMetadata() 129 130 def add_comment(self, name: str, comment: str) -> None: 131 """Adds comment to AuditCard 132 133 Args: 134 name: 135 Name of person making comment 136 comment: 137 Comment to add 138 139 """ 140 comment_model = Comment(name=name, comment=comment) 141 142 if any(comment_model == _comment for _comment in self.comments): 143 return # Exit early if comment already exists 144 145 self.comments.insert(0, comment_model) 146 147 def create_registry_record(self) -> Dict[str, Any]: 148 """Creates a registry record for a audit""" 149 150 return self.model_dump() 151 152 def add_card(self, card: ArtifactCard) -> None: 153 """ 154 Adds a card uid to the appropriate card uid list for tracking 155 156 Args: 157 card: 158 Card to add to AuditCard 159 """ 160 if card.uid is None: 161 raise ValueError( 162 f"""Card uid must be provided for {card.card_type}. 163 Uid must be registered prior to adding to AuditCard.""" 164 ) 165 166 if card.card_type.lower() not in [ 167 CardType.DATACARD.value, 168 CardType.MODELCARD.value, 169 CardType.RUNCARD.value, 170 ]: 171 raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run") 172 173 card_list = getattr(self.metadata, f"{card.card_type.lower()}cards") 174 card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type)) 175 176 @property 177 def business(self) -> Dict[int, Question]: 178 return self.audit.business_understanding 179 180 @property 181 def data_understanding(self) -> Dict[int, Question]: 182 return self.audit.data_understanding 183 184 @property 185 def data_preparation(self) -> Dict[int, Question]: 186 return self.audit.data_preparation 187 188 @property 189 def modeling(self) -> Dict[int, Question]: 190 return self.audit.modeling 191 192 @property 193 def evaluation(self) -> Dict[int, Question]: 194 return self.audit.evaluation 195 196 @property 197 def deployment(self) -> Dict[int, Question]: 198 return self.audit.deployment_ops 199 200 @property 201 def misc(self) -> Dict[int, Question]: 202 return self.audit.misc 203 204 def list_questions(self, section: Optional[str] = None) -> None: 205 """Lists all Audit Card questions in a rich table 206 207 Args: 208 section: 209 Section name. Can be one of: business, data_understanding, data_preparation, modeling, 210 evaluation or misc 211 """ 212 213 table = AuditQuestionTable() 214 215 if section is not None: 216 questions = self._get_section(section) 217 for nbr, question in questions.items(): 218 table.add_row(section_name=section, nbr=nbr, question=question) 219 220 else: 221 for _section in self.audit: 222 section_name, questions = _section 223 for nbr, question in questions.items(): 224 table.add_row(section_name=section_name, nbr=nbr, question=question) 225 226 table.add_section() 227 228 table.print_table() 229 230 def _get_section(self, section: str) -> Dict[int, Question]: 231 """Gets a section from the audit card 232 233 Args: 234 section: 235 Section name. Can be one of: business, data_understanding, data_preparation, modeling, 236 evaluation or misc 237 Returns: 238 Dict[int, Question]: A dictionary of questions 239 """ 240 241 if not hasattr(self, section): 242 raise ValueError( 243 f"""Section {section} not found. Accepted values are: business, data_understanding, 244 data_preparation, modeling, evaluation, deployment or misc""" 245 ) 246 _section: Dict[int, Question] = getattr(self, section) 247 return _section 248 249 def answer_question(self, section: str, question_nbr: int, response: str) -> None: 250 """Answers a question in a section 251 252 Args: 253 section: 254 Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation, 255 deployment or misc 256 question_nbr: 257 Question number 258 response: 259 Response to the question 260 261 """ 262 263 _section: Dict[int, Question] = self._get_section(section) 264 265 try: 266 _section[question_nbr].response = response 267 except KeyError as exc: 268 logger.error("Question {} not found in section {}", question_nbr, section) 269 raise exc 270 271 @property 272 def card_type(self) -> str: 273 return CardType.AUDITCARD.value
31class Question(BaseModel): 32 question: str 33 purpose: str 34 response: Optional[str] = None 35 36 model_config = ConfigDict(frozen=False)
Usage docs: https://docs.pydantic.dev/2.6/concepts/models/
A base class for creating Pydantic models.
Attributes:
- __class_vars__: The names of classvars defined on the model.
- __private_attributes__: Metadata about the private attributes of the model.
- __signature__: The signature for instantiating the model.
- __pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
- __pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
- __pydantic_custom_init__: Whether the model has a custom
__init__
function. - __pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces
Model.__validators__
andModel.__root_validators__
from Pydantic V1. - __pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
- __pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
- __pydantic_post_init__: The name of the post-init method for the model, if defined.
- __pydantic_root_model__: Whether the model is a
RootModel
. - __pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
- __pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
- __pydantic_extra__: An instance attribute with the values of extra fields from validation when
model_config['extra'] == 'allow'
. - __pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
- __pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
39class AuditSections(BaseModel): 40 business_understanding: Dict[int, SerializeAsAny[Question]] 41 data_understanding: Dict[int, SerializeAsAny[Question]] 42 data_preparation: Dict[int, SerializeAsAny[Question]] 43 modeling: Dict[int, SerializeAsAny[Question]] 44 evaluation: Dict[int, SerializeAsAny[Question]] 45 deployment_ops: Dict[int, SerializeAsAny[Question]] 46 misc: Dict[int, SerializeAsAny[Question]] 47 48 @model_validator(mode="before") 49 @classmethod 50 def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]: 51 """Loads audit sections from template if no values are provided""" 52 53 if any(values): 54 return values 55 return cls.load_yaml_template() 56 57 @staticmethod 58 def load_yaml_template() -> AuditSectionType: 59 with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream: 60 try: 61 audit_sections = cast(AuditSectionType, yaml.safe_load(stream)) 62 except yaml.YAMLError as exc: 63 raise exc 64 return audit_sections
Usage docs: https://docs.pydantic.dev/2.6/concepts/models/
A base class for creating Pydantic models.
Attributes:
- __class_vars__: The names of classvars defined on the model.
- __private_attributes__: Metadata about the private attributes of the model.
- __signature__: The signature for instantiating the model.
- __pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
- __pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
- __pydantic_custom_init__: Whether the model has a custom
__init__
function. - __pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces
Model.__validators__
andModel.__root_validators__
from Pydantic V1. - __pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
- __pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
- __pydantic_post_init__: The name of the post-init method for the model, if defined.
- __pydantic_root_model__: Whether the model is a
RootModel
. - __pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
- __pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
- __pydantic_extra__: An instance attribute with the values of extra fields from validation when
model_config['extra'] == 'allow'
. - __pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
- __pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
48 @model_validator(mode="before") 49 @classmethod 50 def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]: 51 """Loads audit sections from template if no values are provided""" 52 53 if any(values): 54 return values 55 return cls.load_yaml_template()
Loads audit sections from template if no values are provided
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs
67class AuditQuestionTable: 68 """Helper class for creating a rich table to be used with an AuditCard""" 69 70 def __init__(self) -> None: 71 self.table = self.create_table() 72 73 def create_table(self) -> Table: 74 """Create Rich table of Audit""" 75 table = Table(title="Audit Questions") 76 table.add_column("Section", no_wrap=True) 77 table.add_column("Number") 78 table.add_column("Question") 79 table.add_column("Answered", justify="right") 80 return table 81 82 def add_row(self, section_name: str, nbr: int, question: Question) -> None: 83 """Add row to table""" 84 self.table.add_row( 85 section_name, 86 str(nbr), 87 question.question, 88 "Yes" if question.response else "No", 89 ) 90 91 def add_section(self) -> None: 92 """Add section""" 93 self.table.add_section() 94 95 def print_table(self) -> None: 96 """Print table""" 97 console = Console() 98 console.print(self.table)
Helper class for creating a rich table to be used with an AuditCard
73 def create_table(self) -> Table: 74 """Create Rich table of Audit""" 75 table = Table(title="Audit Questions") 76 table.add_column("Section", no_wrap=True) 77 table.add_column("Number") 78 table.add_column("Question") 79 table.add_column("Answered", justify="right") 80 return table
Create Rich table of Audit
82 def add_row(self, section_name: str, nbr: int, question: Question) -> None: 83 """Add row to table""" 84 self.table.add_row( 85 section_name, 86 str(nbr), 87 question.question, 88 "Yes" if question.response else "No", 89 )
Add row to table
101class AuditCard(ArtifactCard): 102 """ 103 Creates an AuditCard for storing audit-related information about a 104 machine learning project. 105 106 Args: 107 name: 108 What to name the AuditCard 109 repository: 110 Repository that this card is associated with 111 contact: 112 Contact to associate with the AuditCard 113 info: 114 `CardInfo` object containing additional metadata. If provided, it will override any 115 values provided for `name`, `repository`, `contact`, and `version`. 116 117 Name, repository, and contact are required arguments for all cards. They can be provided 118 directly or through a `CardInfo` object. 119 120 audit: 121 AuditSections object containing the audit questions and responses 122 approved: 123 Whether the audit has been approved 124 """ 125 126 audit: AuditSections = AuditSections() 127 approved: bool = False 128 comments: List[SerializeAsAny[Comment]] = [] 129 metadata: AuditCardMetadata = AuditCardMetadata() 130 131 def add_comment(self, name: str, comment: str) -> None: 132 """Adds comment to AuditCard 133 134 Args: 135 name: 136 Name of person making comment 137 comment: 138 Comment to add 139 140 """ 141 comment_model = Comment(name=name, comment=comment) 142 143 if any(comment_model == _comment for _comment in self.comments): 144 return # Exit early if comment already exists 145 146 self.comments.insert(0, comment_model) 147 148 def create_registry_record(self) -> Dict[str, Any]: 149 """Creates a registry record for a audit""" 150 151 return self.model_dump() 152 153 def add_card(self, card: ArtifactCard) -> None: 154 """ 155 Adds a card uid to the appropriate card uid list for tracking 156 157 Args: 158 card: 159 Card to add to AuditCard 160 """ 161 if card.uid is None: 162 raise ValueError( 163 f"""Card uid must be provided for {card.card_type}. 164 Uid must be registered prior to adding to AuditCard.""" 165 ) 166 167 if card.card_type.lower() not in [ 168 CardType.DATACARD.value, 169 CardType.MODELCARD.value, 170 CardType.RUNCARD.value, 171 ]: 172 raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run") 173 174 card_list = getattr(self.metadata, f"{card.card_type.lower()}cards") 175 card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type)) 176 177 @property 178 def business(self) -> Dict[int, Question]: 179 return self.audit.business_understanding 180 181 @property 182 def data_understanding(self) -> Dict[int, Question]: 183 return self.audit.data_understanding 184 185 @property 186 def data_preparation(self) -> Dict[int, Question]: 187 return self.audit.data_preparation 188 189 @property 190 def modeling(self) -> Dict[int, Question]: 191 return self.audit.modeling 192 193 @property 194 def evaluation(self) -> Dict[int, Question]: 195 return self.audit.evaluation 196 197 @property 198 def deployment(self) -> Dict[int, Question]: 199 return self.audit.deployment_ops 200 201 @property 202 def misc(self) -> Dict[int, Question]: 203 return self.audit.misc 204 205 def list_questions(self, section: Optional[str] = None) -> None: 206 """Lists all Audit Card questions in a rich table 207 208 Args: 209 section: 210 Section name. Can be one of: business, data_understanding, data_preparation, modeling, 211 evaluation or misc 212 """ 213 214 table = AuditQuestionTable() 215 216 if section is not None: 217 questions = self._get_section(section) 218 for nbr, question in questions.items(): 219 table.add_row(section_name=section, nbr=nbr, question=question) 220 221 else: 222 for _section in self.audit: 223 section_name, questions = _section 224 for nbr, question in questions.items(): 225 table.add_row(section_name=section_name, nbr=nbr, question=question) 226 227 table.add_section() 228 229 table.print_table() 230 231 def _get_section(self, section: str) -> Dict[int, Question]: 232 """Gets a section from the audit card 233 234 Args: 235 section: 236 Section name. Can be one of: business, data_understanding, data_preparation, modeling, 237 evaluation or misc 238 Returns: 239 Dict[int, Question]: A dictionary of questions 240 """ 241 242 if not hasattr(self, section): 243 raise ValueError( 244 f"""Section {section} not found. Accepted values are: business, data_understanding, 245 data_preparation, modeling, evaluation, deployment or misc""" 246 ) 247 _section: Dict[int, Question] = getattr(self, section) 248 return _section 249 250 def answer_question(self, section: str, question_nbr: int, response: str) -> None: 251 """Answers a question in a section 252 253 Args: 254 section: 255 Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation, 256 deployment or misc 257 question_nbr: 258 Question number 259 response: 260 Response to the question 261 262 """ 263 264 _section: Dict[int, Question] = self._get_section(section) 265 266 try: 267 _section[question_nbr].response = response 268 except KeyError as exc: 269 logger.error("Question {} not found in section {}", question_nbr, section) 270 raise exc 271 272 @property 273 def card_type(self) -> str: 274 return CardType.AUDITCARD.value
Creates an AuditCard for storing audit-related information about a machine learning project.
Arguments:
- name: What to name the AuditCard
- repository: Repository that this card is associated with
- contact: Contact to associate with the AuditCard
info:
CardInfo
object containing additional metadata. If provided, it will override any values provided forname
,repository
,contact
, andversion
.Name, repository, and contact are required arguments for all cards. They can be provided directly or through a
CardInfo
object.- audit: AuditSections object containing the audit questions and responses
- approved: Whether the audit has been approved
131 def add_comment(self, name: str, comment: str) -> None: 132 """Adds comment to AuditCard 133 134 Args: 135 name: 136 Name of person making comment 137 comment: 138 Comment to add 139 140 """ 141 comment_model = Comment(name=name, comment=comment) 142 143 if any(comment_model == _comment for _comment in self.comments): 144 return # Exit early if comment already exists 145 146 self.comments.insert(0, comment_model)
Adds comment to AuditCard
Arguments:
- name: Name of person making comment
- comment: Comment to add
148 def create_registry_record(self) -> Dict[str, Any]: 149 """Creates a registry record for a audit""" 150 151 return self.model_dump()
Creates a registry record for a audit
153 def add_card(self, card: ArtifactCard) -> None: 154 """ 155 Adds a card uid to the appropriate card uid list for tracking 156 157 Args: 158 card: 159 Card to add to AuditCard 160 """ 161 if card.uid is None: 162 raise ValueError( 163 f"""Card uid must be provided for {card.card_type}. 164 Uid must be registered prior to adding to AuditCard.""" 165 ) 166 167 if card.card_type.lower() not in [ 168 CardType.DATACARD.value, 169 CardType.MODELCARD.value, 170 CardType.RUNCARD.value, 171 ]: 172 raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run") 173 174 card_list = getattr(self.metadata, f"{card.card_type.lower()}cards") 175 card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type))
Adds a card uid to the appropriate card uid list for tracking
Arguments:
- card: Card to add to AuditCard
205 def list_questions(self, section: Optional[str] = None) -> None: 206 """Lists all Audit Card questions in a rich table 207 208 Args: 209 section: 210 Section name. Can be one of: business, data_understanding, data_preparation, modeling, 211 evaluation or misc 212 """ 213 214 table = AuditQuestionTable() 215 216 if section is not None: 217 questions = self._get_section(section) 218 for nbr, question in questions.items(): 219 table.add_row(section_name=section, nbr=nbr, question=question) 220 221 else: 222 for _section in self.audit: 223 section_name, questions = _section 224 for nbr, question in questions.items(): 225 table.add_row(section_name=section_name, nbr=nbr, question=question) 226 227 table.add_section() 228 229 table.print_table()
Lists all Audit Card questions in a rich table
Arguments:
- section: Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation or misc
250 def answer_question(self, section: str, question_nbr: int, response: str) -> None: 251 """Answers a question in a section 252 253 Args: 254 section: 255 Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation, 256 deployment or misc 257 question_nbr: 258 Question number 259 response: 260 Response to the question 261 262 """ 263 264 _section: Dict[int, Question] = self._get_section(section) 265 266 try: 267 _section[question_nbr].response = response 268 except KeyError as exc: 269 logger.error("Question {} not found in section {}", question_nbr, section) 270 raise exc
Answers a question in a section
Arguments:
- section: Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation, deployment or misc
- question_nbr: Question number
- response: Response to the question
Inherited Members
- pydantic.main.BaseModel
- BaseModel
- model_extra
- model_fields_set
- model_construct
- model_copy
- model_dump
- model_dump_json
- model_json_schema
- model_parametrized_name
- model_post_init
- model_rebuild
- model_validate
- model_validate_json
- model_validate_strings
- dict
- json
- parse_obj
- parse_raw
- parse_file
- from_orm
- construct
- copy
- schema
- schema_json
- validate
- update_forward_refs