Source code for deirokay.parser.loader

"""
Functions to parse files into pandas DataFrames.
"""
import warnings
from copy import deepcopy
from typing import Optional, Union

from deirokay.__version__ import __comp_version__
from deirokay._typing import (DeirokayColumnOptions, DeirokayDataSource,
                              DeirokayOptionsDocument)
from deirokay._utils import check_columns_in_df_columns
from deirokay.backend import detect_backend
from deirokay.enums import Backend
from deirokay.exceptions import InvalidBackend, ParsingError
from deirokay.fs import fs_factory

from .reader import reader_factory
from .treaters import get_treater_instance


[docs]def data_reader(data: Union[str, DeirokayDataSource], options: Union[str, DeirokayOptionsDocument], backend: Optional[Backend] = None, **kwargs) -> DeirokayDataSource: """Create a new tabular data from a file or an object and apply Deirokay treatments to correctly parse it and pre-validate its content. Parameters ---------- data : Union[str, DeirokayDataSource] Path or object used as data source. options : Union[str, DeirokayOptionsDocument] Path to or `dict` representing a Deirokay Options document. backend: Optional[BackendValue], optional Defines backend to use for tables. By default None. Inferred from `data` when it is a valid DeirokayDataSource. Should be set when `data` is a path. Returns ------- DeirokayDataSource A tabular data treated by Deirokay. """ if isinstance(options, str): options_dict = fs_factory(options).read_dict() else: options_dict = deepcopy(options) options_dict.update(kwargs) columns = options_dict.pop('columns') if isinstance(data, str) and backend is None: if __comp_version__ < (2,): warnings.warn( 'To preserve backward compatibility, the `backend` attribute' ' is assumed to be `Backend.PANDAS` when reading data from a' ' file or SQL query.\n' 'In future, this behavior will change and an exception will' ' be raised whenever the backend cannot be inferred from the' ' `data` attribute. To prevent this error in future and' ' suppress this warning in the current version,' ' set the `backend` attribute explicitely in `data_reader()`.', FutureWarning ) backend = Backend.PANDAS elif __comp_version__ >= (2,): raise InvalidBackend( 'You should provide a `backend` attribute when it cannot be' ' inferred from `data`.' ) backend = backend or detect_backend(data) reader = reader_factory(backend) df = reader.read(data, columns=list(columns), **options_dict) data_treater(df, columns, backend) return df
[docs]def data_treater(df: DeirokayDataSource, options: DeirokayColumnOptions, backend: Backend) -> None: """Receive options dict and call the proper treater class for each Deirokay data type. Parameters ---------- df : DeirokayDataSource Raw tabular data to be treated. options : DeirokayColumnOptions Deirokay options. Raises ------ NotImplementedError Data type not valid or not implemented. """ check_columns_in_df_columns(options.keys(), df.columns) for col, opt in options.items(): option: dict = opt.copy() try: df[col] = get_treater_instance(option, backend)(df[col]) except Exception as e: raise ParsingError(f'Error when parsing "{col}".') from e