opsml.cards.audit

View Source

  1# mypy: disable-error-code="call-arg"
  2# Copyright (c) Shipt, Inc.
  3# This source code is licensed under the MIT license found in the
  4# LICENSE file in the root directory of this source tree.
  5
  6import os
  7from typing import Any, Dict, List, Optional, cast
  8
  9import yaml
 10from pydantic import BaseModel, ConfigDict, SerializeAsAny, model_validator
 11from rich.console import Console
 12from rich.table import Table
 13
 14from opsml.cards.base import ArtifactCard
 15from opsml.helpers.logging import ArtifactLogger
 16from opsml.types import (
 17    AuditCardMetadata,
 18    AuditSectionType,
 19    CardType,
 20    CardVersion,
 21    Comment,
 22)
 23
 24logger = ArtifactLogger.get_logger()
 25DIR_PATH = os.path.dirname(__file__)
 26AUDIT_TEMPLATE_PATH = os.path.join(DIR_PATH, "templates/audit_card.yaml")
 27
 28
 29# create new python class that inherits from ArtifactCard and is called AuditCard
 30class Question(BaseModel):
 31    question: str
 32    purpose: str
 33    response: Optional[str] = None
 34
 35    model_config = ConfigDict(frozen=False)
 36
 37
 38class AuditSections(BaseModel):
 39    business_understanding: Dict[int, SerializeAsAny[Question]]
 40    data_understanding: Dict[int, SerializeAsAny[Question]]
 41    data_preparation: Dict[int, SerializeAsAny[Question]]
 42    modeling: Dict[int, SerializeAsAny[Question]]
 43    evaluation: Dict[int, SerializeAsAny[Question]]
 44    deployment_ops: Dict[int, SerializeAsAny[Question]]
 45    misc: Dict[int, SerializeAsAny[Question]]
 46
 47    @model_validator(mode="before")
 48    @classmethod
 49    def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 50        """Loads audit sections from template if no values are provided"""
 51
 52        if any(values):
 53            return values
 54        return cls.load_yaml_template()
 55
 56    @staticmethod
 57    def load_yaml_template() -> AuditSectionType:
 58        with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream:
 59            try:
 60                audit_sections = cast(AuditSectionType, yaml.safe_load(stream))
 61            except yaml.YAMLError as exc:
 62                raise exc
 63        return audit_sections
 64
 65
 66class AuditQuestionTable:
 67    """Helper class for creating a rich table to be used with an AuditCard"""
 68
 69    def __init__(self) -> None:
 70        self.table = self.create_table()
 71
 72    def create_table(self) -> Table:
 73        """Create Rich table of Audit"""
 74        table = Table(title="Audit Questions")
 75        table.add_column("Section", no_wrap=True)
 76        table.add_column("Number")
 77        table.add_column("Question")
 78        table.add_column("Answered", justify="right")
 79        return table
 80
 81    def add_row(self, section_name: str, nbr: int, question: Question) -> None:
 82        """Add row to table"""
 83        self.table.add_row(
 84            section_name,
 85            str(nbr),
 86            question.question,
 87            "Yes" if question.response else "No",
 88        )
 89
 90    def add_section(self) -> None:
 91        """Add section"""
 92        self.table.add_section()
 93
 94    def print_table(self) -> None:
 95        """Print table"""
 96        console = Console()
 97        console.print(self.table)
 98
 99
100class AuditCard(ArtifactCard):
101    """
102    Creates an AuditCard for storing audit-related information about a
103    machine learning project.
104
105    Args:
106        name:
107            What to name the AuditCard
108        repository:
109            Repository that this card is associated with
110        contact:
111            Contact to associate with the AuditCard
112        info:
113            `CardInfo` object containing additional metadata. If provided, it will override any
114            values provided for `name`, `repository`, `contact`, and `version`.
115
116            Name, repository, and contact are required arguments for all cards. They can be provided
117            directly or through a `CardInfo` object.
118
119        audit:
120            AuditSections object containing the audit questions and responses
121        approved:
122            Whether the audit has been approved
123    """
124
125    audit: AuditSections = AuditSections()
126    approved: bool = False
127    comments: List[SerializeAsAny[Comment]] = []
128    metadata: AuditCardMetadata = AuditCardMetadata()
129
130    def add_comment(self, name: str, comment: str) -> None:
131        """Adds comment to AuditCard
132
133        Args:
134            name:
135                Name of person making comment
136            comment:
137                Comment to add
138
139        """
140        comment_model = Comment(name=name, comment=comment)
141
142        if any(comment_model == _comment for _comment in self.comments):
143            return  # Exit early if comment already exists
144
145        self.comments.insert(0, comment_model)
146
147    def create_registry_record(self) -> Dict[str, Any]:
148        """Creates a registry record for a audit"""
149
150        return self.model_dump()
151
152    def add_card(self, card: ArtifactCard) -> None:
153        """
154        Adds a card uid to the appropriate card uid list for tracking
155
156        Args:
157            card:
158                Card to add to AuditCard
159        """
160        if card.uid is None:
161            raise ValueError(
162                f"""Card uid must be provided for {card.card_type}.
163                Uid must be registered prior to adding to AuditCard."""
164            )
165
166        if card.card_type.lower() not in [
167            CardType.DATACARD.value,
168            CardType.MODELCARD.value,
169            CardType.RUNCARD.value,
170        ]:
171            raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run")
172
173        card_list = getattr(self.metadata, f"{card.card_type.lower()}cards")
174        card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type))
175
176    @property
177    def business(self) -> Dict[int, Question]:
178        return self.audit.business_understanding
179
180    @property
181    def data_understanding(self) -> Dict[int, Question]:
182        return self.audit.data_understanding
183
184    @property
185    def data_preparation(self) -> Dict[int, Question]:
186        return self.audit.data_preparation
187
188    @property
189    def modeling(self) -> Dict[int, Question]:
190        return self.audit.modeling
191
192    @property
193    def evaluation(self) -> Dict[int, Question]:
194        return self.audit.evaluation
195
196    @property
197    def deployment(self) -> Dict[int, Question]:
198        return self.audit.deployment_ops
199
200    @property
201    def misc(self) -> Dict[int, Question]:
202        return self.audit.misc
203
204    def list_questions(self, section: Optional[str] = None) -> None:
205        """Lists all Audit Card questions in a rich table
206
207        Args:
208            section:
209                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
210                evaluation or misc
211        """
212
213        table = AuditQuestionTable()
214
215        if section is not None:
216            questions = self._get_section(section)
217            for nbr, question in questions.items():
218                table.add_row(section_name=section, nbr=nbr, question=question)
219
220        else:
221            for _section in self.audit:
222                section_name, questions = _section
223                for nbr, question in questions.items():
224                    table.add_row(section_name=section_name, nbr=nbr, question=question)
225
226                table.add_section()
227
228        table.print_table()
229
230    def _get_section(self, section: str) -> Dict[int, Question]:
231        """Gets a section from the audit card
232
233        Args:
234            section:
235                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
236                evaluation or misc
237        Returns:
238            Dict[int, Question]: A dictionary of questions
239        """
240
241        if not hasattr(self, section):
242            raise ValueError(
243                f"""Section {section} not found. Accepted values are: business, data_understanding,
244                data_preparation, modeling, evaluation, deployment or misc"""
245            )
246        _section: Dict[int, Question] = getattr(self, section)
247        return _section
248
249    def answer_question(self, section: str, question_nbr: int, response: str) -> None:
250        """Answers a question in a section
251
252        Args:
253            section:
254                Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation,
255                deployment or misc
256            question_nbr:
257                Question number
258            response:
259                Response to the question
260
261        """
262
263        _section: Dict[int, Question] = self._get_section(section)
264
265        try:
266            _section[question_nbr].response = response
267        except KeyError as exc:
268            logger.error("Question {} not found in section {}", question_nbr, section)
269            raise exc
270
271    @property
272    def card_type(self) -> str:
273        return CardType.AUDITCARD.value

logger = <builtins.Logger object>

DIR_PATH = '/home/steven_forrester/github/opsml/opsml/cards'

AUDIT_TEMPLATE_PATH = '/home/steven_forrester/github/opsml/opsml/cards/templates/audit_card.yaml'

class Question(pydantic.main.BaseModel): View Source

31class Question(BaseModel):
32    question: str
33    purpose: str
34    response: Optional[str] = None
35
36    model_config = ConfigDict(frozen=False)

Usage docs: https://docs.pydantic.dev/2.6/concepts/models/

A base class for creating Pydantic models.

Attributes:

__class_vars__: The names of classvars defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a RootModel.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.

question: str

purpose: str

response: Optional[str]

model_config = {'frozen': False}

model_fields = {'question': FieldInfo(annotation=str, required=True), 'purpose': FieldInfo(annotation=str, required=True), 'response': FieldInfo(annotation=Union[str, NoneType], required=False)}

model_computed_fields = {}

Inherited Members

pydantic.main.BaseModel: BaseModel; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; dict; json; parse_obj; parse_raw; parse_file; from_orm; construct; copy; schema; schema_json; validate; update_forward_refs

class AuditSections(pydantic.main.BaseModel): View Source

39class AuditSections(BaseModel):
40    business_understanding: Dict[int, SerializeAsAny[Question]]
41    data_understanding: Dict[int, SerializeAsAny[Question]]
42    data_preparation: Dict[int, SerializeAsAny[Question]]
43    modeling: Dict[int, SerializeAsAny[Question]]
44    evaluation: Dict[int, SerializeAsAny[Question]]
45    deployment_ops: Dict[int, SerializeAsAny[Question]]
46    misc: Dict[int, SerializeAsAny[Question]]
47
48    @model_validator(mode="before")
49    @classmethod
50    def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
51        """Loads audit sections from template if no values are provided"""
52
53        if any(values):
54            return values
55        return cls.load_yaml_template()
56
57    @staticmethod
58    def load_yaml_template() -> AuditSectionType:
59        with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream:
60            try:
61                audit_sections = cast(AuditSectionType, yaml.safe_load(stream))
62            except yaml.YAMLError as exc:
63                raise exc
64        return audit_sections

Usage docs: https://docs.pydantic.dev/2.6/concepts/models/

A base class for creating Pydantic models.

Attributes:

__class_vars__: The names of classvars defined on the model.
__private_attributes__: Metadata about the private attributes of the model.
__signature__: The signature for instantiating the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
__pydantic_custom_init__: Whether the model has a custom __init__ function.
__pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a RootModel.
__pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
__pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
__pydantic_extra__: An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
__pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
__pydantic_private__: Instance attribute with the values of private attributes set on the model instance.

business_understanding: Dict[int, Annotated[Question, SerializeAsAny()]]

data_understanding: Dict[int, Annotated[Question, SerializeAsAny()]]

data_preparation: Dict[int, Annotated[Question, SerializeAsAny()]]

modeling: Dict[int, Annotated[Question, SerializeAsAny()]]

evaluation: Dict[int, Annotated[Question, SerializeAsAny()]]

deployment_ops: Dict[int, Annotated[Question, SerializeAsAny()]]

misc: Dict[int, Annotated[Question, SerializeAsAny()]]

@model_validator(mode='before')

@classmethod

def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]: View Source

48    @model_validator(mode="before")
49    @classmethod
50    def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
51        """Loads audit sections from template if no values are provided"""
52
53        if any(values):
54            return values
55        return cls.load_yaml_template()

Loads audit sections from template if no values are provided

@staticmethod

def load_yaml_template() -> Dict[str, Dict[int, Dict[str, str]]]: View Source

57    @staticmethod
58    def load_yaml_template() -> AuditSectionType:
59        with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream:
60            try:
61                audit_sections = cast(AuditSectionType, yaml.safe_load(stream))
62            except yaml.YAMLError as exc:
63                raise exc
64        return audit_sections

model_config = {}

model_fields = {'business_understanding': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'data_understanding': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'data_preparation': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'modeling': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'evaluation': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'deployment_ops': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'misc': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True)}

model_computed_fields = {}

Inherited Members

pydantic.main.BaseModel: BaseModel; model_extra; model_fields_set; model_construct; model_copy; model_dump; model_dump_json; model_json_schema; model_parametrized_name; model_post_init; model_rebuild; model_validate; model_validate_json; model_validate_strings; dict; json; parse_obj; parse_raw; parse_file; from_orm; construct; copy; schema; schema_json; validate; update_forward_refs

class AuditQuestionTable: View Source

67class AuditQuestionTable:
68    """Helper class for creating a rich table to be used with an AuditCard"""
69
70    def __init__(self) -> None:
71        self.table = self.create_table()
72
73    def create_table(self) -> Table:
74        """Create Rich table of Audit"""
75        table = Table(title="Audit Questions")
76        table.add_column("Section", no_wrap=True)
77        table.add_column("Number")
78        table.add_column("Question")
79        table.add_column("Answered", justify="right")
80        return table
81
82    def add_row(self, section_name: str, nbr: int, question: Question) -> None:
83        """Add row to table"""
84        self.table.add_row(
85            section_name,
86            str(nbr),
87            question.question,
88            "Yes" if question.response else "No",
89        )
90
91    def add_section(self) -> None:
92        """Add section"""
93        self.table.add_section()
94
95    def print_table(self) -> None:
96        """Print table"""
97        console = Console()
98        console.print(self.table)

Helper class for creating a rich table to be used with an AuditCard

table

def create_table(self) -> rich.table.Table: View Source

73    def create_table(self) -> Table:
74        """Create Rich table of Audit"""
75        table = Table(title="Audit Questions")
76        table.add_column("Section", no_wrap=True)
77        table.add_column("Number")
78        table.add_column("Question")
79        table.add_column("Answered", justify="right")
80        return table

Create Rich table of Audit

def add_row( self, section_name: str, nbr: int, question: Question) -> None: View Source

82    def add_row(self, section_name: str, nbr: int, question: Question) -> None:
83        """Add row to table"""
84        self.table.add_row(
85            section_name,
86            str(nbr),
87            question.question,
88            "Yes" if question.response else "No",
89        )

Add row to table

def add_section(self) -> None: View Source

91    def add_section(self) -> None:
92        """Add section"""
93        self.table.add_section()

Add section

def print_table(self) -> None: View Source

95    def print_table(self) -> None:
96        """Print table"""
97        console = Console()
98        console.print(self.table)

Print table

opsml.cards.audit

Attributes:

Inherited Members

Attributes:

Inherited Members

Arguments:

Arguments:

Arguments:

Arguments:

Arguments:

Inherited Members