Source code for deirokay.parser.treaters.builtin.base_treater

from abc import ABC
from typing import Any, Iterable, List, Optional

import dask.dataframe  # lazy module
import pandas  # lazy module

from deirokay._typing import DeirokayDataSeries, DeirokaySerializedSeries
from deirokay.backend import MultiBackendMixin
from deirokay.enums import Backend, DTypes

from ..multibackend import treat as treat_


[docs]class BaseTreater(MultiBackendMixin, ABC): """Base class for all data treaters.""" supported_backends: List[Backend] = [Backend.PANDAS, Backend.DASK] supported_dtype: Optional[DTypes] = None """Optional[DTypes]: DType treated by this treater.""" supported_primitives: List[Any] = [] """List[Any]: List of primitives supported by this treater."""
[docs] def __call__(self, series: DeirokayDataSeries) -> DeirokayDataSeries: """Proxy for `treat`.""" return self.treat(series)
[docs] def treat(self, series: Iterable) -> DeirokayDataSeries: """Treat a raw Series to match data expectations for parsing and formatting. Calls the appropriate method for the detected backend Parameters ---------- series : Iterable Raw series to be treated. """ raise NotImplementedError
@treat_(Backend.PANDAS, force=True) def _treat_pandas(self, series: Iterable) -> 'pandas.Series': if isinstance(series, pandas.Series): return series return pandas.Series(series) @treat_(Backend.DASK, force=True) def _treat_dask(self, series: Iterable) -> 'dask.dataframe.Series': if isinstance(series, dask.dataframe.Series): return series # Assumption: When treating a general Iterable, we assume # it fits entirely in memory # TODO: let user propose a max partition size, # instead of assuming a single partition return dask.dataframe.from_pandas(pandas.Series(series), npartitions=1)
[docs] @staticmethod def serialize(series: DeirokayDataSeries) -> DeirokaySerializedSeries: """Create a Deirokay-compatible serializable object that can be serialized (in JSON or YAML formats) and parsed back by Deirokay treaters. This method is useful when generating validation documents that embed Deirokay-compatible user data. The output format is a Python dict containing two keys: - `values`: list of values as Python object. - `parser`: Deirokay options to parse the data. (See more: `deirokay.parser.get_treater_instance`). Parameters ---------- series : DeirokayDataSeries Series data to be serialized. Returns ------- dict A Python dict containing the keys `values` and `parser`. """ raise NotImplementedError