Source code for deirokay.parser.treaters.builtin.numeric_treater

"""
Classes and functions to treat column data types according to
Deirokay data types.
"""

from typing import Iterable, Optional

import dask.dataframe  # lazy module
import pandas  # lazy module

from deirokay._typing import DeirokayDataSeries
from deirokay.enums import Backend

from ..multibackend import treat
from .validator import Validator


[docs]class NumericTreater(Validator): """Base class for numeric treaters Parameters ---------- thousand_sep : Optional[str], optional Character to use as thousand separator, by default None """ supported_backends = [Backend.PANDAS, Backend.DASK] def __init__(self, thousand_sep: Optional[str] = None, **kwargs): super().__init__(**kwargs) self.thousand_sep = thousand_sep def _treat_thousand_sep(self, series: DeirokayDataSeries) -> DeirokayDataSeries: if self.thousand_sep is not None: try: series = series.str.replace(self.thousand_sep, '', regex=False) except AttributeError as e: raise AttributeError( 'Make sure you are not declaring a `thousand_sep` to' ' read a non-text-like column. This may happen when' ' reading numeric columns from a .parquet file,' ' for instance.' ) from e return series @treat(Backend.PANDAS) def _treat_pandas(self, series: Iterable) -> 'pandas.Series': series = super()._treat_pandas(series) series = self._treat_thousand_sep(series) return series @treat(Backend.DASK) def _treat_dask( self, series: Iterable ) -> 'dask.dataframe.Series': series = super()._treat_dask(series) series = self._treat_thousand_sep(series) return series