Module omnipy.modules.pandas.models

Overview

View Source

from collections.abc import Iterable

from typing import Any

from omnipy.data.dataset import Dataset

from omnipy.data.model import Model, ROOT_KEY

from . import pd

class PandasModel(Model[pd.DataFrame]):

    @classmethod

    def _parse_data(cls, data: pd.DataFrame) -> pd.DataFrame:

        cls._data_column_names_are_strings(data)

        cls._data_not_empty_object(data)

        return data

    @staticmethod

    def _data_column_names_are_strings(data: pd.DataFrame) -> None:

        for column in data.columns:

            assert isinstance(column, str)

    @staticmethod

    def _data_not_empty_object(data: pd.DataFrame) -> None:

        assert not any(data.isna().all(axis=1))

    def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:

        df = super().dict(*args, **kwargs)[ROOT_KEY]

        df = df.replace({pd.NA: None})

        return {ROOT_KEY: df.to_dict(orient='records')}

    def from_data(self, value: Iterable[Any]) -> None:

        self.contents = pd.DataFrame(value).convert_dtypes()

    def from_json(self, value: str) -> None:

        self.contents = pd.read_json(value).convert_dtypes()

class PandasDataset(Dataset[PandasModel]):

    ...

class ListOfPandasDatasetsWithSameNumberOfFiles(Model[list[PandasDataset]]):

    @classmethod

    def _parse_data(cls, dataset_list: list[PandasDataset]) -> Any:

        assert len(dataset_list) >= 2

        assert all(len(dataset) for dataset in dataset_list)

Variables

ROOT_KEY

Classes

ListOfPandasDatasetsWithSameNumberOfFiles

class ListOfPandasDatasetsWithSameNumberOfFiles(
    value: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    __root__: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    **data: Any
)

A data model containing a value parsed according to the model.

If no value is provided, the value is set to the default value of the data model, found by calling the model class without parameters, e.g. int().

Model is a generic class that cannot be instantiated directly. Instead, a Model class needs to be specialized with a data type before Model objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

Example data model specialized as a class alias::

MyNumberList = Model[list[int]]

... alternatively as a Model subclass::

class MyNumberList(Model[list[int]]):
    pass

Once instantiated, a Model object functions as a parser, e.g.::

my_number_list = MyNumberList([2,3,4])

my_number_list.contents = ['3', 4, True]
assert my_number_list.contents == [3,4,1]

While the following should raise a ValidationError::

my_number_list.contents = ['abc', 'def']

The Model class is a wrapper class around the powerful GenericModel class from pydantic.

See also docs of the Dataset class for more usage examples.

View Source

class ListOfPandasDatasetsWithSameNumberOfFiles(Model[list[PandasDataset]]):

    @classmethod

    def _parse_data(cls, dataset_list: list[PandasDataset]) -> Any:

        assert len(dataset_list) >= 2

        assert all(len(dataset) for dataset in dataset_list)

Class variables

Config

Static methods

to_json_schema

def to_json_schema(
    pretty=False
) -> str

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`

View Source

    @classmethod

    def to_json_schema(cls, pretty=False) -> str:

        schema = cls.schema()

        if pretty:

            return cls._pretty_print_json(schema)

        else:

            return json.dumps(schema)

validate

def validate(
    value: Any
) -> Model

Hack to allow overwriting of iter method without compromising pydantic validation. Part

of the pydantic API and not the Omnipy API.

Parameters:

Name	Type	Description	Default
`value`	`Any`

Returns:

Type	Description
`Model`

View Source

    @classmethod

    def validate(cls: Type['Model'], value: Any) -> 'Model':

        """

        Hack to allow overwriting of __iter__ method without compromising pydantic validation. Part

        of the pydantic API and not the Omnipy API.

        """

        if isinstance(value, Model):

            with AttribHolder(value, '__iter__', GenericModel.__iter__, on_class=True):

                return super().validate(value)

        else:

            return super().validate(value)

Instance variables

contents

Methods

delitem

def __delitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name	Type	Description	Default
`args`	`object`
`kwargs`	`object`

Returns:

Type	Description
`object`

View Source

        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)

eq

def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name	Type	Description	Default
`other`	`object`

Returns:

Type	Description
`bool`

View Source

    def __eq__(self, other: object) -> bool:

        return isinstance(other, Model) \

            and self.__class__ == other.__class__ \

            and self.contents == other.contents \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case

getattr

def __getattr__(
    self,
    attr: str
) -> Any

Parameters:

Name	Type	Description	Default
`attr`	`str`

Returns:

Type	Description
`Any`

View Source

    def __getattr__(self, attr: str) -> Any:

        ret = self._getattr_from_contents(attr)

        if callable(ret):

            ret = add_callback_after_call(ret, self.validate_contents)

        return ret

iter

def __iter__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name	Type	Description	Default
`args`	`object`
`kwargs`	`object`

Returns:

Type	Description
`object`

View Source

        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)

setattr

def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name	Type	Description	Default
`attr`	`str`
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ and attr not in [ROOT_KEY]:

            super().__setattr__(attr, value)

        else:

            if attr in ['contents']:

                contents_prop = getattr(self.__class__, attr)

                contents_prop.__set__(self, value)

            else:

                raise RuntimeError('Model does not allow setting of extra attributes')

setitem

def __setitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name	Type	Description	Default
`args`	`object`
`kwargs`	`object`

Returns:

Type	Description
`object`

View Source

        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)

from_data

def from_data(
    self,
    value: Any
) -> None

Parameters:

Name	Type	Description	Default
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def from_data(self, value: Any) -> None:

        self.contents = value

from_json

def from_json(
    self,
    json_contents: str
) -> None

Parameters:

Name	Type	Description	Default
`json_contents`	`str`

Returns:

Type	Description
`NoneType`

View Source

    def from_json(self, json_contents: str) -> None:

        new_model = self.parse_raw(json_contents, proto=pydantic_protocol.json)

        self._set_contents_without_validation(new_model)

inner_type

def inner_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name	Type	Description	Default
`with_args`	`bool`

Returns:

Type	Description
`type`	None

View Source

    def inner_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=False, with_args=with_args)

is_nested_type

def is_nested_type(
    self
) -> bool

Returns:

Type	Description
`bool`

View Source

    def is_nested_type(self) -> bool:

        return not self.inner_type(with_args=True) == self.outer_type(with_args=True)

outer_type

def outer_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name	Type	Description	Default
`with_args`	`bool`

Returns:

Type	Description
`type`	None

View Source

    def outer_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=True, with_args=with_args)

to_data

def to_data(
    self
) -> Any

Returns:

Type	Description
`Any`

View Source

    def to_data(self) -> Any:

        return self.dict()[ROOT_KEY]

to_json

def to_json(
    self,
    pretty=False
) -> str

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`

View Source

    def to_json(self, pretty=False) -> str:

        json_content = self.json()

        if pretty:

            return self._pretty_print_json(json.loads(json_content))

        else:

            return json_content

validate_contents

def validate_contents(
    self
)

View Source

    def validate_contents(self):

        self.contents = self.contents

PandasDataset

class PandasDataset(
    value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
    **input_data: object
)

Dict-based container of data files that follow a specific Model

Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

The specialization must be done through the use of Model, either directly, e.g.::

MyDataset = Dataset[Model[dict[str, list[int]]])

... or indirectly, using a Model subclass, e.g.::

class MyModel(Model[dict[str, list[int]]):
    pass

MyDataset = Dataset[MyModel]

... alternatively through the specification of a Dataset subclass::

class MyDataset(Dataset[MyModel]):
    pass

The specialization can also be done in a more deeply nested structure, e.g.::

class MyNumberList(Model[list[int]]):
    pass

class MyToplevelDict(Model[dict[str, MyNumberList]]):
    pass

class MyDataset(Dataset[MyToplevelDict]):
    pass

Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::

MyNumberListDataset = Dataset[Model[list[int]]]

my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]

print(my_dataset.keys())

The Dataset class is a wrapper class around the powerful GenericModel class from pydantic.

View Source

class PandasDataset(Dataset[PandasModel]):

    ...

Class variables

Config

Static methods

get_model_class

def get_model_class(

) -> Type[omnipy.data.model.Model]

Returns the concrete Model class used for all data files in the dataset, e.g.:

Model[list[int]]

Returns:

Type	Description
`Type[Model]`	The concrete Model class used for all data files in the dataset

View Source

    @classmethod

    def get_model_class(cls) -> Type[Model]:

        """

        Returns the concrete Model class used for all data files in the dataset, e.g.:

        `Model[list[int]]`

        :return: The concrete Model class used for all data files in the dataset

        """

        model_type = cls.__fields__.get(DATA_KEY).type_

        return cls._origmodel_if_annotated_optional(model_type)

to_json_schema

def to_json_schema(
    pretty=False
) -> str | dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`	dict[str, str]

View Source

    @classmethod

    def to_json_schema(cls, pretty=False) -> str | dict[str, str]:

        result = {}

        schema = cls.schema()

        for key, val in schema['properties']['data'].items():

            result[key] = val

        result['title'] = schema['title']

        result['definitions'] = schema['definitions']

        if pretty:

            return cls._pretty_print_json(result)

        else:

            return json.dumps(result)

Methods

eq

def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name	Type	Description	Default
`other`	`object`

Returns:

Type	Description
`bool`

View Source

    def __eq__(self, other: object) -> bool:

        # return self.__class__ == other.__class__ and super().__eq__(other)

        return isinstance(other, Dataset) \

            and self.__class__ == other.__class__ \

            and self.data == other.data \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case

iter

def __iter__(
    self
) -> Iterator

so dict(model) works

Returns:

Type	Description
`Iterator`

View Source

    def __iter__(self) -> Iterator:

        return UserDict.__iter__(self)

setattr

def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name	Type	Description	Default
`attr`	`str`
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):

            super().__setattr__(attr, value)

        else:

            raise RuntimeError('Model does not allow setting of extra attributes')

setitem

def __setitem__(
    self,
    obj_type: str,
    data_obj: Any
) -> None

Parameters:

Name	Type	Description	Default
`obj_type`	`str`
`data_obj`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setitem__(self, obj_type: str, data_obj: Any) -> None:

        has_prev_value = obj_type in self.data

        prev_value = self.data.get(obj_type)

        try:

            self.data[obj_type] = data_obj

            self._validate(obj_type)

        except:  # noqa

            if has_prev_value:

                self.data[obj_type] = prev_value

            else:

                del self.data[obj_type]

            raise

as_multi_model_dataset

def as_multi_model_dataset(
    self
) -> MultiModelDataset[ModelT]

Returns:

Type	Description
`'MultiModelDataset[ModelT]'`

View Source

    def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':

        multi_model_dataset = MultiModelDataset[self.get_model_class()]()

        for obj_type in self:

            multi_model_dataset.data[obj_type] = self.data[obj_type]

        return multi_model_dataset

from_data

def from_data(
    self,
    data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, Any], Iterator[tuple[str, Any]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_data(self,

                  data: dict[str, Any] | Iterator[tuple[str, Any]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_data(obj_val)

            self[obj_type] = new_model

from_json

def from_json(
    self,
    data: Union[dict[str, str], Iterator[tuple[str, str]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, str], Iterator[tuple[str, str]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_json(self,

                  data: dict[str, str] | Iterator[tuple[str, str]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_json(obj_val)

            self[obj_type] = new_model

to_data

def to_data(
    self
) -> dict[str, typing.Any]

Returns:

Type	Description
`dict[str, typing.Any]`

View Source

    def to_data(self) -> dict[str, Any]:

        return GenericModel.dict(self).get(DATA_KEY)

to_json

def to_json(
    self,
    pretty=False
) -> dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`dict[str, str]`

View Source

    def to_json(self, pretty=False) -> dict[str, str]:

        result = {}

        for key, val in self.to_data().items():

            result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)

        return result

PandasModel

class PandasModel(
    value: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    __root__: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    **data: Any
)

A data model containing a value parsed according to the model.

If no value is provided, the value is set to the default value of the data model, found by calling the model class without parameters, e.g. int().

Model is a generic class that cannot be instantiated directly. Instead, a Model class needs to be specialized with a data type before Model objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

Example data model specialized as a class alias::

MyNumberList = Model[list[int]]

... alternatively as a Model subclass::

class MyNumberList(Model[list[int]]):
    pass

Once instantiated, a Model object functions as a parser, e.g.::

my_number_list = MyNumberList([2,3,4])

my_number_list.contents = ['3', 4, True]
assert my_number_list.contents == [3,4,1]

While the following should raise a ValidationError::

my_number_list.contents = ['abc', 'def']

The Model class is a wrapper class around the powerful GenericModel class from pydantic.

See also docs of the Dataset class for more usage examples.

View Source

class PandasModel(Model[pd.DataFrame]):

    @classmethod

    def _parse_data(cls, data: pd.DataFrame) -> pd.DataFrame:

        cls._data_column_names_are_strings(data)

        cls._data_not_empty_object(data)

        return data

    @staticmethod

    def _data_column_names_are_strings(data: pd.DataFrame) -> None:

        for column in data.columns:

            assert isinstance(column, str)

    @staticmethod

    def _data_not_empty_object(data: pd.DataFrame) -> None:

        assert not any(data.isna().all(axis=1))

    def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:

        df = super().dict(*args, **kwargs)[ROOT_KEY]

        df = df.replace({pd.NA: None})

        return {ROOT_KEY: df.to_dict(orient='records')}

    def from_data(self, value: Iterable[Any]) -> None:

        self.contents = pd.DataFrame(value).convert_dtypes()

    def from_json(self, value: str) -> None:

        self.contents = pd.read_json(value).convert_dtypes()

Class variables

Config

Static methods

to_json_schema

def to_json_schema(
    pretty=False
) -> str

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`

View Source

    @classmethod

    def to_json_schema(cls, pretty=False) -> str:

        schema = cls.schema()

        if pretty:

            return cls._pretty_print_json(schema)

        else:

            return json.dumps(schema)

validate

def validate(
    value: Any
) -> Model

Hack to allow overwriting of iter method without compromising pydantic validation. Part

of the pydantic API and not the Omnipy API.

Parameters:

Name	Type	Description	Default
`value`	`Any`

Returns:

Type	Description
`Model`

View Source

    @classmethod

    def validate(cls: Type['Model'], value: Any) -> 'Model':

        """

        Hack to allow overwriting of __iter__ method without compromising pydantic validation. Part

        of the pydantic API and not the Omnipy API.

        """

        if isinstance(value, Model):

            with AttribHolder(value, '__iter__', GenericModel.__iter__, on_class=True):

                return super().validate(value)

        else:

            return super().validate(value)

Instance variables

contents

Methods

delitem

def __delitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name	Type	Description	Default
`args`	`object`
`kwargs`	`object`

Returns:

Type	Description
`object`

View Source

        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)

eq

def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name	Type	Description	Default
`other`	`object`

Returns:

Type	Description
`bool`

View Source

    def __eq__(self, other: object) -> bool:

        return isinstance(other, Model) \

            and self.__class__ == other.__class__ \

            and self.contents == other.contents \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case

getattr

def __getattr__(
    self,
    attr: str
) -> Any

Parameters:

Name	Type	Description	Default
`attr`	`str`

Returns:

Type	Description
`Any`

View Source

    def __getattr__(self, attr: str) -> Any:

        ret = self._getattr_from_contents(attr)

        if callable(ret):

            ret = add_callback_after_call(ret, self.validate_contents)

        return ret

iter

def __iter__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name	Type	Description	Default
`args`	`object`
`kwargs`	`object`

Returns:

Type	Description
`object`

View Source

        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)

setattr

def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name	Type	Description	Default
`attr`	`str`
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ and attr not in [ROOT_KEY]:

            super().__setattr__(attr, value)

        else:

            if attr in ['contents']:

                contents_prop = getattr(self.__class__, attr)

                contents_prop.__set__(self, value)

            else:

                raise RuntimeError('Model does not allow setting of extra attributes')

setitem

def __setitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name	Type	Description	Default
`args`	`object`
`kwargs`	`object`

Returns:

Type	Description
`object`

View Source

        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)

dict

def dict(
    self,
    *args,
    **kwargs
) -> dict[str, dict[typing.Any, typing.Any]]

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

Parameters:

Name	Type	Description	Default
`args`
`kwargs`

Returns:

Type	Description
`dict[str, dict[typing.Any, typing.Any]]`

View Source

    def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:

        df = super().dict(*args, **kwargs)[ROOT_KEY]

        df = df.replace({pd.NA: None})

        return {ROOT_KEY: df.to_dict(orient='records')}

from_data

def from_data(
    self,
    value: collections.abc.Iterable[typing.Any]
) -> None

Parameters:

Name	Type	Description	Default
`value`	`collections.abc.Iterable[typing.Any]`

Returns:

Type	Description
`NoneType`

View Source

    def from_data(self, value: Iterable[Any]) -> None:

        self.contents = pd.DataFrame(value).convert_dtypes()

from_json

def from_json(
    self,
    value: str
) -> None

Parameters:

Name	Type	Description	Default
`value`	`str`

Returns:

Type	Description
`NoneType`

View Source

    def from_json(self, value: str) -> None:

        self.contents = pd.read_json(value).convert_dtypes()

inner_type

def inner_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name	Type	Description	Default
`with_args`	`bool`

Returns:

Type	Description
`type`	None

View Source

    def inner_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=False, with_args=with_args)

is_nested_type

def is_nested_type(
    self
) -> bool

Returns:

Type	Description
`bool`

View Source

    def is_nested_type(self) -> bool:

        return not self.inner_type(with_args=True) == self.outer_type(with_args=True)

outer_type

def outer_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name	Type	Description	Default
`with_args`	`bool`

Returns:

Type	Description
`type`	None

View Source

    def outer_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=True, with_args=with_args)

to_data

def to_data(
    self
) -> Any

Returns:

Type	Description
`Any`

View Source

    def to_data(self) -> Any:

        return self.dict()[ROOT_KEY]

to_json

def to_json(
    self,
    pretty=False
) -> str

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`

View Source

    def to_json(self, pretty=False) -> str:

        json_content = self.json()

        if pretty:

            return self._pretty_print_json(json.loads(json_content))

        else:

            return json_content

validate_contents

def validate_contents(
    self
)

View Source

    def validate_contents(self):

        self.contents = self.contents