opsml.profile.profile_data

  1# pylint: disable=redefined-outer-name,import-outside-toplevel
  2# Copyright (c) Shipt, Inc.
  3# This source code is licensed under the MIT license found in the
  4# LICENSE file in the root directory of this source tree.
  5
  6import os
  7from typing import Any, List, Union
  8
  9import pandas as pd
 10import polars as pl
 11
 12DIR_PATH = os.path.dirname(__file__)
 13ProfileReport = Any
 14
 15
 16class DataProfiler:
 17    @staticmethod
 18    def create_profile_report(
 19        data: Union[pd.DataFrame, pl.DataFrame],
 20        name: str,
 21        sample_perc: float = 1,
 22    ) -> ProfileReport:
 23        """
 24        Creates a `ydata-profiling` report
 25
 26        Args:
 27            data:
 28                Pandas dataframe
 29            sample_perc:
 30                Percentage to use for sampling
 31            name:
 32                Name of the report
 33
 34        Returns:
 35            `ProfileReport`
 36        """
 37        from ydata_profiling import ProfileReport
 38
 39        kwargs = {"title": f"Profile report for {name}"}
 40
 41        if isinstance(data, pl.DataFrame):
 42            if sample_perc < 1:
 43                return ProfileReport(
 44                    df=data.sample(fraction=sample_perc, with_replacement=False, shuffle=True).to_pandas(),
 45                    config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 46                    lazy=False,
 47                    **kwargs,
 48                )
 49
 50            return ProfileReport(
 51                df=data.to_pandas(),
 52                config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 53                lazy=False,
 54                **kwargs,
 55            )
 56
 57        if sample_perc < 1:
 58            return ProfileReport(
 59                df=data.sample(frac=sample_perc, replace=False),
 60                config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 61                lazy=False,
 62                **kwargs,
 63            )
 64
 65        return ProfileReport(
 66            df=data,
 67            config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 68            lazy=False,
 69            **kwargs,
 70        )
 71
 72    @staticmethod
 73    def load_profile(data: bytes) -> ProfileReport:
 74        """Loads a `ProfileReport` from data bytes
 75
 76        Args:
 77            data:
 78                `ProfileReport` in bytes
 79
 80        Returns:
 81            `ProfileReport`
 82        """
 83        from ydata_profiling import ProfileReport
 84
 85        profile = ProfileReport()
 86        profile.loads(data)
 87        return profile
 88
 89    @staticmethod
 90    def compare_reports(reports: List[ProfileReport]) -> ProfileReport:
 91        """Compares ProfileReports
 92
 93        Args:
 94            reports:
 95                List of `ProfileReport`
 96
 97        Returns:
 98            `ProfileReport`
 99        """
100        from ydata_profiling import compare
101
102        return compare(reports=reports)
DIR_PATH = '/home/steven_forrester/github/opsml/opsml/profile'
ProfileReport = typing.Any
class DataProfiler:
 17class DataProfiler:
 18    @staticmethod
 19    def create_profile_report(
 20        data: Union[pd.DataFrame, pl.DataFrame],
 21        name: str,
 22        sample_perc: float = 1,
 23    ) -> ProfileReport:
 24        """
 25        Creates a `ydata-profiling` report
 26
 27        Args:
 28            data:
 29                Pandas dataframe
 30            sample_perc:
 31                Percentage to use for sampling
 32            name:
 33                Name of the report
 34
 35        Returns:
 36            `ProfileReport`
 37        """
 38        from ydata_profiling import ProfileReport
 39
 40        kwargs = {"title": f"Profile report for {name}"}
 41
 42        if isinstance(data, pl.DataFrame):
 43            if sample_perc < 1:
 44                return ProfileReport(
 45                    df=data.sample(fraction=sample_perc, with_replacement=False, shuffle=True).to_pandas(),
 46                    config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 47                    lazy=False,
 48                    **kwargs,
 49                )
 50
 51            return ProfileReport(
 52                df=data.to_pandas(),
 53                config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 54                lazy=False,
 55                **kwargs,
 56            )
 57
 58        if sample_perc < 1:
 59            return ProfileReport(
 60                df=data.sample(frac=sample_perc, replace=False),
 61                config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 62                lazy=False,
 63                **kwargs,
 64            )
 65
 66        return ProfileReport(
 67            df=data,
 68            config_file=os.path.join(DIR_PATH, "profile_config.yml"),
 69            lazy=False,
 70            **kwargs,
 71        )
 72
 73    @staticmethod
 74    def load_profile(data: bytes) -> ProfileReport:
 75        """Loads a `ProfileReport` from data bytes
 76
 77        Args:
 78            data:
 79                `ProfileReport` in bytes
 80
 81        Returns:
 82            `ProfileReport`
 83        """
 84        from ydata_profiling import ProfileReport
 85
 86        profile = ProfileReport()
 87        profile.loads(data)
 88        return profile
 89
 90    @staticmethod
 91    def compare_reports(reports: List[ProfileReport]) -> ProfileReport:
 92        """Compares ProfileReports
 93
 94        Args:
 95            reports:
 96                List of `ProfileReport`
 97
 98        Returns:
 99            `ProfileReport`
100        """
101        from ydata_profiling import compare
102
103        return compare(reports=reports)
@staticmethod
def create_profile_report( data: Union[pandas.core.frame.DataFrame, polars.dataframe.frame.DataFrame], name: str, sample_perc: float = 1) -> Any:
18    @staticmethod
19    def create_profile_report(
20        data: Union[pd.DataFrame, pl.DataFrame],
21        name: str,
22        sample_perc: float = 1,
23    ) -> ProfileReport:
24        """
25        Creates a `ydata-profiling` report
26
27        Args:
28            data:
29                Pandas dataframe
30            sample_perc:
31                Percentage to use for sampling
32            name:
33                Name of the report
34
35        Returns:
36            `ProfileReport`
37        """
38        from ydata_profiling import ProfileReport
39
40        kwargs = {"title": f"Profile report for {name}"}
41
42        if isinstance(data, pl.DataFrame):
43            if sample_perc < 1:
44                return ProfileReport(
45                    df=data.sample(fraction=sample_perc, with_replacement=False, shuffle=True).to_pandas(),
46                    config_file=os.path.join(DIR_PATH, "profile_config.yml"),
47                    lazy=False,
48                    **kwargs,
49                )
50
51            return ProfileReport(
52                df=data.to_pandas(),
53                config_file=os.path.join(DIR_PATH, "profile_config.yml"),
54                lazy=False,
55                **kwargs,
56            )
57
58        if sample_perc < 1:
59            return ProfileReport(
60                df=data.sample(frac=sample_perc, replace=False),
61                config_file=os.path.join(DIR_PATH, "profile_config.yml"),
62                lazy=False,
63                **kwargs,
64            )
65
66        return ProfileReport(
67            df=data,
68            config_file=os.path.join(DIR_PATH, "profile_config.yml"),
69            lazy=False,
70            **kwargs,
71        )

Creates a ydata-profiling report

Arguments:
  • data: Pandas dataframe
  • sample_perc: Percentage to use for sampling
  • name: Name of the report
Returns:

ProfileReport

@staticmethod
def load_profile(data: bytes) -> Any:
73    @staticmethod
74    def load_profile(data: bytes) -> ProfileReport:
75        """Loads a `ProfileReport` from data bytes
76
77        Args:
78            data:
79                `ProfileReport` in bytes
80
81        Returns:
82            `ProfileReport`
83        """
84        from ydata_profiling import ProfileReport
85
86        profile = ProfileReport()
87        profile.loads(data)
88        return profile

Loads a ProfileReport from data bytes

Arguments:
Returns:

ProfileReport

@staticmethod
def compare_reports(reports: List[Any]) -> Any:
 90    @staticmethod
 91    def compare_reports(reports: List[ProfileReport]) -> ProfileReport:
 92        """Compares ProfileReports
 93
 94        Args:
 95            reports:
 96                List of `ProfileReport`
 97
 98        Returns:
 99            `ProfileReport`
100        """
101        from ydata_profiling import compare
102
103        return compare(reports=reports)

Compares ProfileReports

Arguments:
Returns:

ProfileReport