Source code for deirokay.statements.builtin.base_statement

"""
The base statement that all other statements inherit from.
"""
from abc import ABC, abstractmethod
from typing import List

from deirokay._typing import (DeirokayDataSource, DeirokayOption,
                              DeirokayStatement)
from deirokay.backend import MultiBackendMixin


[docs]class BaseStatement(MultiBackendMixin, ABC): """Base abstract statement class for all Deirokay statements. Attributes ---------- options : dict Statement parameters provided by user. """ name: str = 'base_statement' """str: Statement name when referred in Validation Documents (only valid for Deirokay built-in statements).""" expected_parameters: List[str] = ['type', 'severity', 'location'] """List[str]: Parameters expected for this statement."""
[docs] def __init_subclass__(cls) -> None: """Validate subclassed statement.""" assert cls.name != BaseStatement.name, ( 'You should specify a `name` attribute for your statement class.' )
def __init__(self, options: DeirokayOption) -> None: self._validate_options(options) self.options = options @classmethod def _validate_options(cls, options: DeirokayOption) -> None: """Make sure all provided statement parameters are expected by statement classes""" unexpected_parameters = [ option for option in options if option not in (cls.expected_parameters + BaseStatement.expected_parameters) ] if unexpected_parameters: raise ValueError( f'Invalid parameters passed to {cls.__name__} statement: ' f'{unexpected_parameters}\n' f'The valid parameters are: {cls.expected_parameters}' )
[docs] def __call__(self, df: DeirokayDataSource) -> dict: """Run statement instance.""" internal_report = self.report(df) result = self.result(internal_report) final_report = { 'detail': internal_report, 'result': result } return final_report
[docs] def report(self, df: DeirokayDataSource) -> dict: """Receive a DataFrame containing only columns on the scope of validation and returns a report of related metrics that can be used later to declare this Statement as fulfilled or failed. Parameters ---------- df : DataFrame The scoped DataFrame columns to be analysed in this report by this statement. Returns ------- dict A dictionary of useful statistics about the target columns. """ raise NotImplementedError
[docs] @abstractmethod def result(self, report: dict) -> bool: """Receive the report previously generated and declare this statement as either fulfilled (True) or failed (False). Parameters ---------- report : dict Report generated by `report` method. Should ideally contain all statistics necessary to evaluate the statement validity. Returns ------- bool Whether or not this statement passed. """
[docs] @staticmethod def profile(df: DeirokayDataSource) -> DeirokayStatement: """Given a template data table, generate a statement dict from it. Parameters ---------- df : DataFrame The DataFrame to be used as template. Returns ------- dict Statement dict. Raises ------ NotImplementedError If this method is not implemented by the subclass or the profile generation for this statement was intentionally skipped. """ raise NotImplementedError