opsml.cards.audit

  1# mypy: disable-error-code="call-arg"
  2# Copyright (c) Shipt, Inc.
  3# This source code is licensed under the MIT license found in the
  4# LICENSE file in the root directory of this source tree.
  5
  6import os
  7from typing import Any, Dict, List, Optional, cast
  8
  9import yaml
 10from pydantic import BaseModel, ConfigDict, SerializeAsAny, model_validator
 11from rich.console import Console
 12from rich.table import Table
 13
 14from opsml.cards.base import ArtifactCard
 15from opsml.helpers.logging import ArtifactLogger
 16from opsml.types import (
 17    AuditCardMetadata,
 18    AuditSectionType,
 19    CardType,
 20    CardVersion,
 21    Comment,
 22)
 23
 24logger = ArtifactLogger.get_logger()
 25DIR_PATH = os.path.dirname(__file__)
 26AUDIT_TEMPLATE_PATH = os.path.join(DIR_PATH, "templates/audit_card.yaml")
 27
 28
 29# create new python class that inherits from ArtifactCard and is called AuditCard
 30class Question(BaseModel):
 31    question: str
 32    purpose: str
 33    response: Optional[str] = None
 34
 35    model_config = ConfigDict(frozen=False)
 36
 37
 38class AuditSections(BaseModel):
 39    business_understanding: Dict[int, SerializeAsAny[Question]]
 40    data_understanding: Dict[int, SerializeAsAny[Question]]
 41    data_preparation: Dict[int, SerializeAsAny[Question]]
 42    modeling: Dict[int, SerializeAsAny[Question]]
 43    evaluation: Dict[int, SerializeAsAny[Question]]
 44    deployment_ops: Dict[int, SerializeAsAny[Question]]
 45    misc: Dict[int, SerializeAsAny[Question]]
 46
 47    @model_validator(mode="before")
 48    @classmethod
 49    def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 50        """Loads audit sections from template if no values are provided"""
 51
 52        if any(values):
 53            return values
 54        return cls.load_yaml_template()
 55
 56    @staticmethod
 57    def load_yaml_template() -> AuditSectionType:
 58        with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream:
 59            try:
 60                audit_sections = cast(AuditSectionType, yaml.safe_load(stream))
 61            except yaml.YAMLError as exc:
 62                raise exc
 63        return audit_sections
 64
 65
 66class AuditQuestionTable:
 67    """Helper class for creating a rich table to be used with an AuditCard"""
 68
 69    def __init__(self) -> None:
 70        self.table = self.create_table()
 71
 72    def create_table(self) -> Table:
 73        """Create Rich table of Audit"""
 74        table = Table(title="Audit Questions")
 75        table.add_column("Section", no_wrap=True)
 76        table.add_column("Number")
 77        table.add_column("Question")
 78        table.add_column("Answered", justify="right")
 79        return table
 80
 81    def add_row(self, section_name: str, nbr: int, question: Question) -> None:
 82        """Add row to table"""
 83        self.table.add_row(
 84            section_name,
 85            str(nbr),
 86            question.question,
 87            "Yes" if question.response else "No",
 88        )
 89
 90    def add_section(self) -> None:
 91        """Add section"""
 92        self.table.add_section()
 93
 94    def print_table(self) -> None:
 95        """Print table"""
 96        console = Console()
 97        console.print(self.table)
 98
 99
100class AuditCard(ArtifactCard):
101    """
102    Creates an AuditCard for storing audit-related information about a
103    machine learning project.
104
105    Args:
106        name:
107            What to name the AuditCard
108        repository:
109            Repository that this card is associated with
110        contact:
111            Contact to associate with the AuditCard
112        info:
113            `CardInfo` object containing additional metadata. If provided, it will override any
114            values provided for `name`, `repository`, `contact`, and `version`.
115
116            Name, repository, and contact are required arguments for all cards. They can be provided
117            directly or through a `CardInfo` object.
118
119        audit:
120            AuditSections object containing the audit questions and responses
121        approved:
122            Whether the audit has been approved
123    """
124
125    audit: AuditSections = AuditSections()
126    approved: bool = False
127    comments: List[SerializeAsAny[Comment]] = []
128    metadata: AuditCardMetadata = AuditCardMetadata()
129
130    def add_comment(self, name: str, comment: str) -> None:
131        """Adds comment to AuditCard
132
133        Args:
134            name:
135                Name of person making comment
136            comment:
137                Comment to add
138
139        """
140        comment_model = Comment(name=name, comment=comment)
141
142        if any(comment_model == _comment for _comment in self.comments):
143            return  # Exit early if comment already exists
144
145        self.comments.insert(0, comment_model)
146
147    def create_registry_record(self) -> Dict[str, Any]:
148        """Creates a registry record for a audit"""
149
150        return self.model_dump()
151
152    def add_card(self, card: ArtifactCard) -> None:
153        """
154        Adds a card uid to the appropriate card uid list for tracking
155
156        Args:
157            card:
158                Card to add to AuditCard
159        """
160        if card.uid is None:
161            raise ValueError(
162                f"""Card uid must be provided for {card.card_type}.
163                Uid must be registered prior to adding to AuditCard."""
164            )
165
166        if card.card_type.lower() not in [
167            CardType.DATACARD.value,
168            CardType.MODELCARD.value,
169            CardType.RUNCARD.value,
170        ]:
171            raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run")
172
173        card_list = getattr(self.metadata, f"{card.card_type.lower()}cards")
174        card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type))
175
176    @property
177    def business(self) -> Dict[int, Question]:
178        return self.audit.business_understanding
179
180    @property
181    def data_understanding(self) -> Dict[int, Question]:
182        return self.audit.data_understanding
183
184    @property
185    def data_preparation(self) -> Dict[int, Question]:
186        return self.audit.data_preparation
187
188    @property
189    def modeling(self) -> Dict[int, Question]:
190        return self.audit.modeling
191
192    @property
193    def evaluation(self) -> Dict[int, Question]:
194        return self.audit.evaluation
195
196    @property
197    def deployment(self) -> Dict[int, Question]:
198        return self.audit.deployment_ops
199
200    @property
201    def misc(self) -> Dict[int, Question]:
202        return self.audit.misc
203
204    def list_questions(self, section: Optional[str] = None) -> None:
205        """Lists all Audit Card questions in a rich table
206
207        Args:
208            section:
209                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
210                evaluation or misc
211        """
212
213        table = AuditQuestionTable()
214
215        if section is not None:
216            questions = self._get_section(section)
217            for nbr, question in questions.items():
218                table.add_row(section_name=section, nbr=nbr, question=question)
219
220        else:
221            for _section in self.audit:
222                section_name, questions = _section
223                for nbr, question in questions.items():
224                    table.add_row(section_name=section_name, nbr=nbr, question=question)
225
226                table.add_section()
227
228        table.print_table()
229
230    def _get_section(self, section: str) -> Dict[int, Question]:
231        """Gets a section from the audit card
232
233        Args:
234            section:
235                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
236                evaluation or misc
237        Returns:
238            Dict[int, Question]: A dictionary of questions
239        """
240
241        if not hasattr(self, section):
242            raise ValueError(
243                f"""Section {section} not found. Accepted values are: business, data_understanding,
244                data_preparation, modeling, evaluation, deployment or misc"""
245            )
246        _section: Dict[int, Question] = getattr(self, section)
247        return _section
248
249    def answer_question(self, section: str, question_nbr: int, response: str) -> None:
250        """Answers a question in a section
251
252        Args:
253            section:
254                Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation,
255                deployment or misc
256            question_nbr:
257                Question number
258            response:
259                Response to the question
260
261        """
262
263        _section: Dict[int, Question] = self._get_section(section)
264
265        try:
266            _section[question_nbr].response = response
267        except KeyError as exc:
268            logger.error("Question {} not found in section {}", question_nbr, section)
269            raise exc
270
271    @property
272    def card_type(self) -> str:
273        return CardType.AUDITCARD.value
logger = <builtins.Logger object>
DIR_PATH = '/home/steven_forrester/github/opsml/opsml/cards'
AUDIT_TEMPLATE_PATH = '/home/steven_forrester/github/opsml/opsml/cards/templates/audit_card.yaml'
class Question(pydantic.main.BaseModel):
31class Question(BaseModel):
32    question: str
33    purpose: str
34    response: Optional[str] = None
35
36    model_config = ConfigDict(frozen=False)

Usage docs: https://docs.pydantic.dev/2.6/concepts/models/

A base class for creating Pydantic models.

Attributes:
  • __class_vars__: The names of classvars defined on the model.
  • __private_attributes__: Metadata about the private attributes of the model.
  • __signature__: The signature for instantiating the model.
  • __pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
  • __pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
  • __pydantic_custom_init__: Whether the model has a custom __init__ function.
  • __pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
  • __pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
  • __pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
  • __pydantic_post_init__: The name of the post-init method for the model, if defined.
  • __pydantic_root_model__: Whether the model is a RootModel.
  • __pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
  • __pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
  • __pydantic_extra__: An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
  • __pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
  • __pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
question: str
purpose: str
response: Optional[str]
model_config = {'frozen': False}
model_fields = {'question': FieldInfo(annotation=str, required=True), 'purpose': FieldInfo(annotation=str, required=True), 'response': FieldInfo(annotation=Union[str, NoneType], required=False)}
model_computed_fields = {}
Inherited Members
pydantic.main.BaseModel
BaseModel
model_extra
model_fields_set
model_construct
model_copy
model_dump
model_dump_json
model_json_schema
model_parametrized_name
model_post_init
model_rebuild
model_validate
model_validate_json
model_validate_strings
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
class AuditSections(pydantic.main.BaseModel):
39class AuditSections(BaseModel):
40    business_understanding: Dict[int, SerializeAsAny[Question]]
41    data_understanding: Dict[int, SerializeAsAny[Question]]
42    data_preparation: Dict[int, SerializeAsAny[Question]]
43    modeling: Dict[int, SerializeAsAny[Question]]
44    evaluation: Dict[int, SerializeAsAny[Question]]
45    deployment_ops: Dict[int, SerializeAsAny[Question]]
46    misc: Dict[int, SerializeAsAny[Question]]
47
48    @model_validator(mode="before")
49    @classmethod
50    def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
51        """Loads audit sections from template if no values are provided"""
52
53        if any(values):
54            return values
55        return cls.load_yaml_template()
56
57    @staticmethod
58    def load_yaml_template() -> AuditSectionType:
59        with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream:
60            try:
61                audit_sections = cast(AuditSectionType, yaml.safe_load(stream))
62            except yaml.YAMLError as exc:
63                raise exc
64        return audit_sections

Usage docs: https://docs.pydantic.dev/2.6/concepts/models/

A base class for creating Pydantic models.

Attributes:
  • __class_vars__: The names of classvars defined on the model.
  • __private_attributes__: Metadata about the private attributes of the model.
  • __signature__: The signature for instantiating the model.
  • __pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
  • __pydantic_core_schema__: The pydantic-core schema used to build the SchemaValidator and SchemaSerializer.
  • __pydantic_custom_init__: Whether the model has a custom __init__ function.
  • __pydantic_decorators__: Metadata containing the decorators defined on the model. This replaces Model.__validators__ and Model.__root_validators__ from Pydantic V1.
  • __pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
  • __pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
  • __pydantic_post_init__: The name of the post-init method for the model, if defined.
  • __pydantic_root_model__: Whether the model is a RootModel.
  • __pydantic_serializer__: The pydantic-core SchemaSerializer used to dump instances of the model.
  • __pydantic_validator__: The pydantic-core SchemaValidator used to validate instances of the model.
  • __pydantic_extra__: An instance attribute with the values of extra fields from validation when model_config['extra'] == 'allow'.
  • __pydantic_fields_set__: An instance attribute with the names of fields explicitly set.
  • __pydantic_private__: Instance attribute with the values of private attributes set on the model instance.
business_understanding: Dict[int, Annotated[Question, SerializeAsAny()]]
data_understanding: Dict[int, Annotated[Question, SerializeAsAny()]]
data_preparation: Dict[int, Annotated[Question, SerializeAsAny()]]
modeling: Dict[int, Annotated[Question, SerializeAsAny()]]
evaluation: Dict[int, Annotated[Question, SerializeAsAny()]]
deployment_ops: Dict[int, Annotated[Question, SerializeAsAny()]]
misc: Dict[int, Annotated[Question, SerializeAsAny()]]
@model_validator(mode='before')
@classmethod
def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
48    @model_validator(mode="before")
49    @classmethod
50    def load_sections(cls, values: Dict[str, Any]) -> Dict[str, Any]:
51        """Loads audit sections from template if no values are provided"""
52
53        if any(values):
54            return values
55        return cls.load_yaml_template()

Loads audit sections from template if no values are provided

@staticmethod
def load_yaml_template() -> Dict[str, Dict[int, Dict[str, str]]]:
57    @staticmethod
58    def load_yaml_template() -> AuditSectionType:
59        with open(AUDIT_TEMPLATE_PATH, "r", encoding="utf-8") as stream:
60            try:
61                audit_sections = cast(AuditSectionType, yaml.safe_load(stream))
62            except yaml.YAMLError as exc:
63                raise exc
64        return audit_sections
model_config = {}
model_fields = {'business_understanding': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'data_understanding': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'data_preparation': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'modeling': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'evaluation': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'deployment_ops': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True), 'misc': FieldInfo(annotation=Dict[int, Annotated[Question, SerializeAsAny]], required=True)}
model_computed_fields = {}
Inherited Members
pydantic.main.BaseModel
BaseModel
model_extra
model_fields_set
model_construct
model_copy
model_dump
model_dump_json
model_json_schema
model_parametrized_name
model_post_init
model_rebuild
model_validate
model_validate_json
model_validate_strings
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
class AuditQuestionTable:
67class AuditQuestionTable:
68    """Helper class for creating a rich table to be used with an AuditCard"""
69
70    def __init__(self) -> None:
71        self.table = self.create_table()
72
73    def create_table(self) -> Table:
74        """Create Rich table of Audit"""
75        table = Table(title="Audit Questions")
76        table.add_column("Section", no_wrap=True)
77        table.add_column("Number")
78        table.add_column("Question")
79        table.add_column("Answered", justify="right")
80        return table
81
82    def add_row(self, section_name: str, nbr: int, question: Question) -> None:
83        """Add row to table"""
84        self.table.add_row(
85            section_name,
86            str(nbr),
87            question.question,
88            "Yes" if question.response else "No",
89        )
90
91    def add_section(self) -> None:
92        """Add section"""
93        self.table.add_section()
94
95    def print_table(self) -> None:
96        """Print table"""
97        console = Console()
98        console.print(self.table)

Helper class for creating a rich table to be used with an AuditCard

table
def create_table(self) -> rich.table.Table:
73    def create_table(self) -> Table:
74        """Create Rich table of Audit"""
75        table = Table(title="Audit Questions")
76        table.add_column("Section", no_wrap=True)
77        table.add_column("Number")
78        table.add_column("Question")
79        table.add_column("Answered", justify="right")
80        return table

Create Rich table of Audit

def add_row( self, section_name: str, nbr: int, question: Question) -> None:
82    def add_row(self, section_name: str, nbr: int, question: Question) -> None:
83        """Add row to table"""
84        self.table.add_row(
85            section_name,
86            str(nbr),
87            question.question,
88            "Yes" if question.response else "No",
89        )

Add row to table

def add_section(self) -> None:
91    def add_section(self) -> None:
92        """Add section"""
93        self.table.add_section()

Add section

def print_table(self) -> None:
95    def print_table(self) -> None:
96        """Print table"""
97        console = Console()
98        console.print(self.table)

Print table

class AuditCard(opsml.cards.base.ArtifactCard):
101class AuditCard(ArtifactCard):
102    """
103    Creates an AuditCard for storing audit-related information about a
104    machine learning project.
105
106    Args:
107        name:
108            What to name the AuditCard
109        repository:
110            Repository that this card is associated with
111        contact:
112            Contact to associate with the AuditCard
113        info:
114            `CardInfo` object containing additional metadata. If provided, it will override any
115            values provided for `name`, `repository`, `contact`, and `version`.
116
117            Name, repository, and contact are required arguments for all cards. They can be provided
118            directly or through a `CardInfo` object.
119
120        audit:
121            AuditSections object containing the audit questions and responses
122        approved:
123            Whether the audit has been approved
124    """
125
126    audit: AuditSections = AuditSections()
127    approved: bool = False
128    comments: List[SerializeAsAny[Comment]] = []
129    metadata: AuditCardMetadata = AuditCardMetadata()
130
131    def add_comment(self, name: str, comment: str) -> None:
132        """Adds comment to AuditCard
133
134        Args:
135            name:
136                Name of person making comment
137            comment:
138                Comment to add
139
140        """
141        comment_model = Comment(name=name, comment=comment)
142
143        if any(comment_model == _comment for _comment in self.comments):
144            return  # Exit early if comment already exists
145
146        self.comments.insert(0, comment_model)
147
148    def create_registry_record(self) -> Dict[str, Any]:
149        """Creates a registry record for a audit"""
150
151        return self.model_dump()
152
153    def add_card(self, card: ArtifactCard) -> None:
154        """
155        Adds a card uid to the appropriate card uid list for tracking
156
157        Args:
158            card:
159                Card to add to AuditCard
160        """
161        if card.uid is None:
162            raise ValueError(
163                f"""Card uid must be provided for {card.card_type}.
164                Uid must be registered prior to adding to AuditCard."""
165            )
166
167        if card.card_type.lower() not in [
168            CardType.DATACARD.value,
169            CardType.MODELCARD.value,
170            CardType.RUNCARD.value,
171        ]:
172            raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run")
173
174        card_list = getattr(self.metadata, f"{card.card_type.lower()}cards")
175        card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type))
176
177    @property
178    def business(self) -> Dict[int, Question]:
179        return self.audit.business_understanding
180
181    @property
182    def data_understanding(self) -> Dict[int, Question]:
183        return self.audit.data_understanding
184
185    @property
186    def data_preparation(self) -> Dict[int, Question]:
187        return self.audit.data_preparation
188
189    @property
190    def modeling(self) -> Dict[int, Question]:
191        return self.audit.modeling
192
193    @property
194    def evaluation(self) -> Dict[int, Question]:
195        return self.audit.evaluation
196
197    @property
198    def deployment(self) -> Dict[int, Question]:
199        return self.audit.deployment_ops
200
201    @property
202    def misc(self) -> Dict[int, Question]:
203        return self.audit.misc
204
205    def list_questions(self, section: Optional[str] = None) -> None:
206        """Lists all Audit Card questions in a rich table
207
208        Args:
209            section:
210                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
211                evaluation or misc
212        """
213
214        table = AuditQuestionTable()
215
216        if section is not None:
217            questions = self._get_section(section)
218            for nbr, question in questions.items():
219                table.add_row(section_name=section, nbr=nbr, question=question)
220
221        else:
222            for _section in self.audit:
223                section_name, questions = _section
224                for nbr, question in questions.items():
225                    table.add_row(section_name=section_name, nbr=nbr, question=question)
226
227                table.add_section()
228
229        table.print_table()
230
231    def _get_section(self, section: str) -> Dict[int, Question]:
232        """Gets a section from the audit card
233
234        Args:
235            section:
236                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
237                evaluation or misc
238        Returns:
239            Dict[int, Question]: A dictionary of questions
240        """
241
242        if not hasattr(self, section):
243            raise ValueError(
244                f"""Section {section} not found. Accepted values are: business, data_understanding,
245                data_preparation, modeling, evaluation, deployment or misc"""
246            )
247        _section: Dict[int, Question] = getattr(self, section)
248        return _section
249
250    def answer_question(self, section: str, question_nbr: int, response: str) -> None:
251        """Answers a question in a section
252
253        Args:
254            section:
255                Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation,
256                deployment or misc
257            question_nbr:
258                Question number
259            response:
260                Response to the question
261
262        """
263
264        _section: Dict[int, Question] = self._get_section(section)
265
266        try:
267            _section[question_nbr].response = response
268        except KeyError as exc:
269            logger.error("Question {} not found in section {}", question_nbr, section)
270            raise exc
271
272    @property
273    def card_type(self) -> str:
274        return CardType.AUDITCARD.value

Creates an AuditCard for storing audit-related information about a machine learning project.

Arguments:
  • name: What to name the AuditCard
  • repository: Repository that this card is associated with
  • contact: Contact to associate with the AuditCard
  • info: CardInfo object containing additional metadata. If provided, it will override any values provided for name, repository, contact, and version.

    Name, repository, and contact are required arguments for all cards. They can be provided directly or through a CardInfo object.

  • audit: AuditSections object containing the audit questions and responses
  • approved: Whether the audit has been approved
audit: AuditSections
approved: bool
comments: List[Annotated[opsml.types.card.Comment, SerializeAsAny()]]
metadata: opsml.types.card.AuditCardMetadata
def add_comment(self, name: str, comment: str) -> None:
131    def add_comment(self, name: str, comment: str) -> None:
132        """Adds comment to AuditCard
133
134        Args:
135            name:
136                Name of person making comment
137            comment:
138                Comment to add
139
140        """
141        comment_model = Comment(name=name, comment=comment)
142
143        if any(comment_model == _comment for _comment in self.comments):
144            return  # Exit early if comment already exists
145
146        self.comments.insert(0, comment_model)

Adds comment to AuditCard

Arguments:
  • name: Name of person making comment
  • comment: Comment to add
def create_registry_record(self) -> Dict[str, Any]:
148    def create_registry_record(self) -> Dict[str, Any]:
149        """Creates a registry record for a audit"""
150
151        return self.model_dump()

Creates a registry record for a audit

def add_card(self, card: opsml.cards.base.ArtifactCard) -> None:
153    def add_card(self, card: ArtifactCard) -> None:
154        """
155        Adds a card uid to the appropriate card uid list for tracking
156
157        Args:
158            card:
159                Card to add to AuditCard
160        """
161        if card.uid is None:
162            raise ValueError(
163                f"""Card uid must be provided for {card.card_type}.
164                Uid must be registered prior to adding to AuditCard."""
165            )
166
167        if card.card_type.lower() not in [
168            CardType.DATACARD.value,
169            CardType.MODELCARD.value,
170            CardType.RUNCARD.value,
171        ]:
172            raise ValueError(f"Invalid card type {card.card_type}. Valid card types are: data, model or run")
173
174        card_list = getattr(self.metadata, f"{card.card_type.lower()}cards")
175        card_list.append(CardVersion(name=card.name, version=card.version, card_type=card.card_type))

Adds a card uid to the appropriate card uid list for tracking

Arguments:
  • card: Card to add to AuditCard
business: Dict[int, Question]
177    @property
178    def business(self) -> Dict[int, Question]:
179        return self.audit.business_understanding
data_understanding: Dict[int, Question]
181    @property
182    def data_understanding(self) -> Dict[int, Question]:
183        return self.audit.data_understanding
data_preparation: Dict[int, Question]
185    @property
186    def data_preparation(self) -> Dict[int, Question]:
187        return self.audit.data_preparation
modeling: Dict[int, Question]
189    @property
190    def modeling(self) -> Dict[int, Question]:
191        return self.audit.modeling
evaluation: Dict[int, Question]
193    @property
194    def evaluation(self) -> Dict[int, Question]:
195        return self.audit.evaluation
deployment: Dict[int, Question]
197    @property
198    def deployment(self) -> Dict[int, Question]:
199        return self.audit.deployment_ops
misc: Dict[int, Question]
201    @property
202    def misc(self) -> Dict[int, Question]:
203        return self.audit.misc
def list_questions(self, section: Optional[str] = None) -> None:
205    def list_questions(self, section: Optional[str] = None) -> None:
206        """Lists all Audit Card questions in a rich table
207
208        Args:
209            section:
210                Section name. Can be one of: business, data_understanding, data_preparation, modeling,
211                evaluation or misc
212        """
213
214        table = AuditQuestionTable()
215
216        if section is not None:
217            questions = self._get_section(section)
218            for nbr, question in questions.items():
219                table.add_row(section_name=section, nbr=nbr, question=question)
220
221        else:
222            for _section in self.audit:
223                section_name, questions = _section
224                for nbr, question in questions.items():
225                    table.add_row(section_name=section_name, nbr=nbr, question=question)
226
227                table.add_section()
228
229        table.print_table()

Lists all Audit Card questions in a rich table

Arguments:
  • section: Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation or misc
def answer_question(self, section: str, question_nbr: int, response: str) -> None:
250    def answer_question(self, section: str, question_nbr: int, response: str) -> None:
251        """Answers a question in a section
252
253        Args:
254            section:
255                Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation,
256                deployment or misc
257            question_nbr:
258                Question number
259            response:
260                Response to the question
261
262        """
263
264        _section: Dict[int, Question] = self._get_section(section)
265
266        try:
267            _section[question_nbr].response = response
268        except KeyError as exc:
269            logger.error("Question {} not found in section {}", question_nbr, section)
270            raise exc

Answers a question in a section

Arguments:
  • section: Section name. Can be one of: business, data_understanding, data_preparation, modeling, evaluation, deployment or misc
  • question_nbr: Question number
  • response: Response to the question
card_type: str
272    @property
273    def card_type(self) -> str:
274        return CardType.AUDITCARD.value
model_config = {'arbitrary_types_allowed': True, 'validate_assignment': False, 'validate_default': True}
model_fields = {'name': FieldInfo(annotation=str, required=False, default='undefined'), 'repository': FieldInfo(annotation=str, required=False, default='undefined'), 'contact': FieldInfo(annotation=str, required=False, default='undefined'), 'version': FieldInfo(annotation=str, required=False, default='0.0.0'), 'uid': FieldInfo(annotation=Union[str, NoneType], required=False), 'info': FieldInfo(annotation=Union[CardInfo, NoneType], required=False), 'tags': FieldInfo(annotation=Dict[str, str], required=False, default={}), 'audit': FieldInfo(annotation=AuditSections, required=False, default=AuditSections(business_understanding={1: Question(question='What business objectives does the product owner pursue?', purpose='\n- Identify the purposes for which the product owner intends to use the AI application \n- Identify the quality of target/goal definition and the assessment against SMART criteria by the product owner.', response=None), 2: Question(question='What business requirements has the product owner defined for the application?', purpose='\n- Identify to what extent business requirements have been derived from the objectives set.\n- Such requirements may include:\n--- development costs\n--- operational costs\n--- staffing needs for operation and use\n-- savings sought\n', response=None), 3: Question(question='What KPIs does the product owner intend to enhance by means of the application?', purpose='\n- Usually it is impossible for the product owner to enhance all kpis at the same time. Therefore the product owner should specify the reason for selecting a specific kpi and the way of measuring any change. If it is impossible to directly measure a kpi, the product owner should specify any indicator used to indirectly assess whether the kpi is within a pre-set target corridor.\n- Identify how the product owner seeks to track and measure the changes against the business targets of the application.', response=None), 4: Question(question='In what business processes shall the application be used? ', purpose='- Identify which processes are affected by the application, how critical those processes are, how they are related and what significance the application has for these processes.', response=None), 5: Question(question='What situation has been the driver for introducing the application? ', purpose='- Identify the driver for the decision to develop the application. State the occurrence of a statutory mandate (Ex: Prop 22), cost increases, new types of fraud, etc.', response=None), 6: Question(question='What framework conditions has the product owner put into place to ensure efficient use of the application? ', purpose='\n- Identify if the product owner is capable to run the system efficiently and to achieve the desired benefits (adequate level of staffing and funds).\n- Aspects for study may include \n--- whether the product owner has suitable staff for development, operation and use of the application,\n--- whether the product owner has put into place an adequate technical and organizational infrastructure,\n--- whether the users are supported by qualified helpdesk staff, and\n--- whether the application has been embedded in a quality assurance environment, and/or internal controlling.\n', response=None), 7: Question(question='What monetary benefits are sought in using the application?', purpose='\n- Identify the relationship between cost and the targeted savings. \n- Identify if there is a reasonable monetary benefit ', response=None), 8: Question(question='What qualifiable and strategic benefits are sought in using the system?', purpose='\n- Identifying any benefits generated by the application beyond financial savings or additional revenue.\n- It is important to identify if the product owner can actually measure and quantify the benefit made. In particular, the product owner shall state if the benefit generated can in fact be attributed to the application.\n', response=None), 9: Question(question='How has the value for money of the system been documented?', purpose='- Identify if the efficiency appraisal template for AI projects or another recognized method has been used and if the method requirements have been complied with.', response=None), 10: Question(question='Which of the following components have been studied in a risk analysis and what have the results been?\n- the application\n- data preparation and \n- the processes mapped by the application\n', purpose='\n- Risk analyses are critical to accomplishing the objectives, to efficiency appraisal, project run, and use of the application.\n- Identify if the product owner has carried out a risk analysis, what risks the product owner seeks to counter and which risks the product owner has decided to shoulder.\n- Identify whether the risks are set off by the benefits associated with the use of the application and if the tolerable risk burden is in accordance with applicable regulations and rules.', response=None)}, data_understanding={1: Question(question='What data are processed by the application?', purpose=' - Learn more about the input data and output data of the application. ', response=None), 2: Question(question='What are the data sources?', purpose='\n- Learn more about \n--- what sources the product owner and project leader approved and data scientist uses,\n--- whether data is up-to-date or not,\n--- what level of reliability both data source and data have,\n--- what the consideration for the data source is or if the data source has a mandatory duty to make information available, and\n\n', response=None), 3: Question(question='What technical/operational criteria have guided data selection?', purpose='\n- Learn more about\n--- the reason for selecting the particular data \n--- alternative data sources or data beyond those selected, and\n--- any dependencies on the data source and the data.\n', response=None), 4: Question(question='How do you assess data quality?', purpose='\n- Learn more about \n--- who checks data quality (product owner, data supplier, third parties, automated test system),\n--- if it is a continuous or a one-off quality assurance process,\n--- what data aspects are key to data quality and why,\n--- how data quality is reviewed, and\n--- what quality criteria have been defined and measured.\n', response=None), 5: Question(question='What data quality is needed for the application to meet the objectives and requirements set?', purpose='\n- Learn more about \n--- how data quality impacts on the application,\n--- how data quality is measured during operation, and\n--- how the application’s performance varies in line with the quality indicators selected.\n', response=None), 6: Question(question='What data quality thresholds have you selected and why?', purpose=' - Learn more about,\n--- if threshold values have been defined for the data quality and\n--- for what technical and other reasons these threshold values have been chosen?\n- The repository should be able to explain what happens when a value is above or below the threshold.', response=None), 7: Question(question='How is the semantic of the data model documented?', purpose=' - Learn more about whether or not semantics and syntax have been defined for all data\n- e.g. whether the repository knows the abbreviated values i.e S&D, BOO etc', response=None), 8: Question(question='Does the application draw on/use/generate any data that are subject to security requirements (e.g. according to GDPR)? Please specify.', purpose='- Learn more about whether the application is subject to GDPR and what is the reason given by the data scientist to use such data. ', response=None), 9: Question(question='How do you ensure data security? ', purpose='- Learn more about technical, organizational and other steps taken by the product owner/developer to ensure data security.', response=None)}, data_preparation={1: Question(question='Please specify any shortcomings in data quality you noted in the input data of the application.', purpose='\n- Possible shortcomings include:\n--- lacking values, \n--- erroneous entries, \n--- inaccurate entries (transposed letters etc.),\n--- lacking datasets, \n--- obsolete entries, and\n--- inconsistent entries.\n', response=None), 2: Question(question='To what extent and how (manually, automatically) do you modify input datasets?', purpose='- Learn more about the steps the product owner/developer/operator takes to address data quality shortcomings. Does the product owner/developer/operator report data errors to data suppliers? Do they replace or ignore missing entries, document data weaknesses to enable benchmarking against future data, etc.?', response=None), 3: Question(question='How has data preparation been documented?', purpose='- Learn more about how data is processed, documented and tracked during operation.', response=None), 4: Question(question='In what way does your data preparation enhance the quality of datasets and how do you measure this impact?', purpose='- Learn more about \n--- criteria for benchmarking unprocessed and pre-processed data, and\n--- as to whether the quality of input data can be benchmarked similarly as output data.\n', response=None), 5: Question(question='How do you assess whether or not pre-processed/unprocessed data make a difference for the run and results of the application?', purpose='- Learn more about any review of application response to diversely cleansed data.', response=None), 6: Question(question='How is the data preparation mapped in the application?', purpose='- Learn more about how data preparation has been integrated in the development, testing, validation, and operation process.\n', response=None), 7: Question(question='What is the mechanism in place for monitoring the quality of data preparation?', purpose='- Learn more about what type of quality assurance has been put into place for the purpose of data preparation, how quality assurance works, when it starts working and how its work is documented.', response=None), 8: Question(question='In what way does your data preparation process address any risks you detected in the early stages of application development.', purpose='\n- Learn more about \n--- any risks posed by data preparation,\n--- any risks of the application and/or the dev environment that may also impact on data preparation, and\n--- any risks of the application and/or the dev environment that are to be mitigated by data preparation.\n', response=None), 9: Question(question='What framework conditions and responsibilities govern data management in this application?', purpose='- Learn more about how data management for the application is structured and what applicable frameworks are in place.', response=None), 10: Question(question='How do you manage the data? ', purpose='\n- Learn more about what data management system is used and how data is stored, e.g. in a\n--- SQL database,\n--- NoSQL database,\n--- data warehouse, or\n--- flat file.\n', response=None)}, modeling={1: Question(question='What data analysis methods have you selected and what are the selection criteria?', purpose='\nMethods may include but are not limited to: \nFrequent pattern mining: association mining, correlation mining\nClassification: decision trees, Bayes classification, rule-based classification, Bayesian belief networks, backpropagation, support vector machines, frequent patterns, lazy learners\nCluster analysis: partitioning methods, hierarchical methods, density-based methods, grid-based methods, probabilistic model-based clustering, graph and network clustering, clustering with constraints\nOutlier detection: outlier detection methods, statistical approaches, proximity-based approaches, clustering-based approaches, mining contextual and collective outliers', response=None), 2: Question(question='What training datasets do you use?', purpose='Collect information on the scope, contents and quality of the training datasets.', response=None), 3: Question(question='How have the training datasets been selected or generated?', purpose='Collect information on the training data generation and selection, on the programmes used in the application, and any errors that may occur.', response=None), 4: Question(question='How are the training datasets updated during the life cycle of the system?', purpose='Collect information on training at operational stage, on whether the model is stable after activation or continuously refined with more training data. Key information includes monitoring and quality assurance of continuous training.', response=None), 5: Question(question='What validation datasets do you use?', purpose='Collect information about the scope, contents and quality of validation datasets.', response=None), 6: Question(question='How have the validation datasets been selected or generated?', purpose='Collect information on generating and selecting validation data, on the programmes used by the application, and on any errors likely to occur.', response=None), 7: Question(question='How are the validation datasets updated during the life cycle of the system?', purpose='Collect information on the validation process at operational stage, on whether the model is stable after activation or continuously refined as validation proceeds. Key information includes monitoring and quality assurance of the validation process.', response=None), 8: Question(question='What test datasets do you use?', purpose='Collect information on the scope, contents and quality of test datasets.', response=None), 9: Question(question='How have the test datasets been selected or generated?', purpose='Collect information on test data generation and selection, the programmes used by the application and any errors likely to occur.', response=None), 10: Question(question='How are the test datasets updated during the life cycle of the application?', purpose='Collect information on testing at operational stage, on whether the model is stable after activation or continuously refined as testing proceeds. Key information includes monitoring and quality assurance of testing.', response=None), 11: Question(question='How do you track modelling, training, validation and test runs?', purpose='Collect information on how modelling, model validation and model testing is documented.', response=None), 12: Question(question='In what way has modelling addressed the risks you detected in the application?', purpose='Collect information on the type of risk analysis conducted for modelling and for factors impacting on modelling', response=None)}, evaluation={1: Question(question='What validation methods do you apply and what were the selection criteria?', purpose='\n- Learn more about\n--- how model quality has been reviewed,\n--- how the decisions/forecasts of the application have been tracked,\n--- how the impact of individual criteria on decisions has been analyzed,\n--- any checks on whether criteria ignored might enhance decisions, and\n--- any model fairness measurements.', response=None), 2: Question(question='What were the results of model validation and how have you evaluated these results?', purpose='\n- Learn more about\n--- how the results accomplished by the validation methods have been documented,\n--- how the results have been construed,\n--- traceability of model response,\n--- the extent to which the model is sufficiently accurate, \n--- how potentially contradictory statements have been assessed,\n--- what empirical data has been used for construing the results, \n--- who reviewed the validation results, and\n--- how the validation results will be used for future validation exercises.', response=None), 3: Question(question='Did you benchmark the performance of your model against any alternative methods/models? Please specify.', purpose='- Learn more about any benchmarking of current methods/models for data analysis against alternative methods/models and about the parameters used.', response=None), 4: Question(question='How does the application respond to faulty or manipulated datasets?', purpose='- Learn more about whether at the training, validation, testing and operational stage, the application has deliberately been exposed to faulty or manipulated data and what the result has been.', response=None), 5: Question(question='Have the objectives set been accomplished and has the application achieved the intended purposes?', purpose='- Learn more about \n--- whether the initial objectives and impacts set by the product owner have been accomplished\n--- how this has been measured and\n--- whether or not additional objectives and impacts have been achieved .', response=None)}, deployment_ops={1: Question(question='At what intervals is the model updated (e.g. to reflect current training data)?', purpose='- Learn more about whether the model is static or dynamic.', response=None), 2: Question(question='How is the application embedded in the surrounding system architecture?', purpose='- Learn more about how the system architecture has been designed, how the application has been embedded,\nwhich interfaces to other system components exist, and how the application depends on these other system components and their changes.', response=None), 3: Question(question='How is the application embedded in the product owner’s process landscape?', purpose='- Understand when and driven by what incidents and framework conditions, users may operate the application as part of technical processes and whether such processes differ on a case-by-case basis or whether the conditions governing the application always remain the same.', response=None), 4: Question(question='What are the major features of human-machine interaction of the application?', purpose='- Understand how the user may influence the application or rely on its results, how the user is informed about actions and results of the application and what autonomy the application may have.', response=None), 5: Question(question='How are the key performance indicators of the application provided to decision-makers?', purpose='- Understand the extent to which decision-makers are informed about decision quality (or uncertainty) of the application.', response=None), 6: Question(question='How is application performance monitored during operation?', purpose='- Understand how and how often performance of the application is monitored or reviewed.', response=None), 7: Question(question='What alternative intervention processes are in place in case of faulty or poor system performance?', purpose='- Understand how business processes depend on the functionality of the application and what happens if the application needs to be bypassed because of erroneous or poor performance (e.g. can staff still manage transactions by using manual inspection or alternative techniques)?\n- Understand if the application may easily be separated from the operating process or if this means bringing the entire automated or manual processing to a halt.\n', response=None), 8: Question(question='What qualifications do users of the application need?', purpose='- Understand what application-related knowledge users need to possess to appropriately assess the decisions made by the application.\n- Understand that users may know nothing at all about the application and its impact on the process.\n', response=None), 9: Question(question='How can users overrule decisions/proposals made by the application? What autonomy do you grant to the application and do you think the level of autonomy is appropriate? ', purpose='- Understand what autonomy the application has (BITKOM model on decision-making processes).', response=None), 10: Question(question='What criteria govern decisions/proposals of the application that are submitted to the user?', purpose='- Understand what decisions are submitted to the user and which are not.', response=None), 11: Question(question='To what extent do you consider the application to comply with applicable laws and regulations?', purpose='- Understand the laws and regulations the application is subject to.\n- Obtain assessments on the application of the various parties involved. Possibly, the Data Gov repository holds a different view of the application than the project manager or the DS .', response=None), 12: Question(question='What ethical concerns do you have about using the application?', purpose='-Understand if apart from purely statutory aspects, the application may also affect ethical aspects.', response=None), 13: Question(question='To what extent can you understand or track decisions/proposals made by the application?', purpose='- Understand if the user considers the decisions/proposals of the application to be fair and reasonable and if the user can even list individual criteria that in his/her view underlie a decisions/proposal of the application.', response=None), 14: Question(question='To what extent are you able to understand or track how the application works?', purpose='- Understand if the user knows and is aware of the internal processes underlying the application or if these ideas are mere presumptions.', response=None), 15: Question(question='How do you protect the application from misuse?', purpose='- Understand what possibilities of misuse exist and what steps have been taken to address them.', response=None), 16: Question(question='What potential types of misuse have you explored?', purpose='- Understand what possibilities of misuse have been analyzed more closely and for what types of misuse knowledge is limited to a theoretical idea only. ', response=None), 17: Question(question='What potential types of attacks on the application and on the embedded processes have you explored and addressed at the planning stage?', purpose='- Understand if the "Security by Design" principle has been implemented in developing the process or the embedded application.', response=None), 18: Question(question='What residual risk still persists and needs to be catered for?', purpose='- Understand to what extent informed decisions have been made with regard to residual risks, and specify any criteria used to decide whether a specific residual risk is tolerable.', response=None), 19: Question(question='What factors impact on the reliability of the overall system in which the application is embedded? How do these factors impact on the embedded application?', purpose='- Understand if the IT environment may trigger incidents or manipulation of the embedded process (e.g. a database on which the application relies for data or for storing its decisions may be corrupted, i.e. occurrence of technical malfunction).', response=None), 20: Question(question='What factors impact on the reliability of the decisions/proposals of the application?', purpose='- Understand if apart from the framework conditions defined by the application itself, there are other variables that may impact on the reliability of the application (e.g. user behavior, flawed organizational procedures, computing power).', response=None), 21: Question(question='To what extent can you rule out any unequal treatment of individuals/facts and figures/matters arising from using the application? How do you verify the occurrence of any such incidents?', purpose='- Understand if an impact analysis tailored to the application has been carried out. Understand any impacts specified and measured in the analysis.', response=None), 22: Question(question='To what extent have sustainability considerations been taken into account such as energy efficiency of operating the AI components?', purpose='- Understand if cost of energy consumption of the AI component is in line with the benefit achieved.\n- Understand if sustainability considerations have duly been taken into account in running the application.', response=None)}, misc={1: Question(question='Demand and change management', purpose='- Understand how demand and change management for developing the application/system have been designed, what tools are used for this purpose, and how the product owner has been involved.', response=None), 2: Question(question='Configuration management', purpose='- Understand how configuration management is structured, how the configuration management database has been designed, how the database is updated and what items it includes.', response=None), 3: Question(question='Software development', purpose='- Understand how software development is structured, what design tools, development tools, and libraries etc. are used.', response=None), 4: Question(question='Quality assurance', purpose='- Understand how quality assurance is structured, how tests and acceptance are structured and how developer tests are designed.', response=None), 5: Question(question='Project management', purpose='- Understand how project management is structured, what approaches and methods have been selected.', response=None), 6: Question(question='Rollout', purpose='- Understand how application/system rollout is structured (e.g. pilot users, gradual rollout, big bang) and what framework conditions have been put into place or are still needed.', response=None), 7: Question(question='Acceptance management', purpose='- Understand how staff, clients etc. have been prepared for application/system rollout and how their understanding and readiness for change has been promoted.', response=None), 8: Question(question='Incident management', purpose='- Understand how users, the operational units etc. can report malfunctions and incidents.', response=None), 9: Question(question='Ombudsman - complaints office', purpose='- Understand if clients (e.g. citizens and private-sector businesses) can address their complaints to a centralized body and how the procedure is structured.', response=None), 10: Question(question='Change management (staff, organization)', purpose='- Understand what changes in practices and procedures, human resources and financial management are associated with rollout and how the organization or its staff have been prepared to face these changes.', response=None)})), 'approved': FieldInfo(annotation=bool, required=False, default=False), 'comments': FieldInfo(annotation=List[Annotated[Comment, SerializeAsAny]], required=False, default=[]), 'metadata': FieldInfo(annotation=AuditCardMetadata, required=False, default=AuditCardMetadata(datacards=[], modelcards=[], runcards=[]))}
model_computed_fields = {}
Inherited Members
pydantic.main.BaseModel
BaseModel
model_extra
model_fields_set
model_construct
model_copy
model_dump
model_dump_json
model_json_schema
model_parametrized_name
model_post_init
model_rebuild
model_validate
model_validate_json
model_validate_strings
dict
json
parse_obj
parse_raw
parse_file
from_orm
construct
copy
schema
schema_json
validate
update_forward_refs
opsml.cards.base.ArtifactCard
name
repository
contact
version
uid
info
tags
validate_args
add_tag
uri
artifact_uri