Skip to content

Module omnipy.modules.pandas.models

Overview

View Source
from collections.abc import Iterable

from typing import Any

from omnipy.data.dataset import Dataset

from omnipy.data.model import Model, ROOT_KEY

from . import pd

class PandasModel(Model[pd.DataFrame]):

    @classmethod

    def _parse_data(cls, data: pd.DataFrame) -> pd.DataFrame:

        cls._data_column_names_are_strings(data)

        cls._data_not_empty_object(data)

        return data

    @staticmethod

    def _data_column_names_are_strings(data: pd.DataFrame) -> None:

        for column in data.columns:

            assert isinstance(column, str)

    @staticmethod

    def _data_not_empty_object(data: pd.DataFrame) -> None:

        assert not any(data.isna().all(axis=1))

    def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:

        df = super().dict(*args, **kwargs)[ROOT_KEY]

        df = df.replace({pd.NA: None})

        return {ROOT_KEY: df.to_dict(orient='records')}

    def from_data(self, value: Iterable[Any]) -> None:

        self.contents = pd.DataFrame(value).convert_dtypes()

    def from_json(self, value: str) -> None:

        self.contents = pd.read_json(value).convert_dtypes()

class PandasDataset(Dataset[PandasModel]):

    ...

class ListOfPandasDatasetsWithSameNumberOfFiles(Model[list[PandasDataset]]):

    @classmethod

    def _parse_data(cls, dataset_list: list[PandasDataset]) -> Any:

        assert len(dataset_list) >= 2

        assert all(len(dataset) for dataset in dataset_list)

Variables

ROOT_KEY

Classes

ListOfPandasDatasetsWithSameNumberOfFiles

class ListOfPandasDatasetsWithSameNumberOfFiles(
    value: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    __root__: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    **data: Any
)

A data model containing a value parsed according to the model.

If no value is provided, the value is set to the default value of the data model, found by calling the model class without parameters, e.g. int().

Model is a generic class that cannot be instantiated directly. Instead, a Model class needs to be specialized with a data type before Model objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

Example data model specialized as a class alias::

MyNumberList = Model[list[int]]

... alternatively as a Model subclass::

class MyNumberList(Model[list[int]]):
    pass

Once instantiated, a Model object functions as a parser, e.g.::

my_number_list = MyNumberList([2,3,4])

my_number_list.contents = ['3', 4, True]
assert my_number_list.contents == [3,4,1]

While the following should raise a ValidationError::

my_number_list.contents = ['abc', 'def']

The Model class is a wrapper class around the powerful GenericModel class from pydantic.

See also docs of the Dataset class for more usage examples.

View Source
class ListOfPandasDatasetsWithSameNumberOfFiles(Model[list[PandasDataset]]):

    @classmethod

    def _parse_data(cls, dataset_list: list[PandasDataset]) -> Any:

        assert len(dataset_list) >= 2

        assert all(len(dataset) for dataset in dataset_list)

Class variables

Config

Static methods

to_json_schema
def to_json_schema(
    pretty=False
) -> str

Parameters:

Name Type Description Default
pretty

Returns:

Type Description
str
View Source
    @classmethod

    def to_json_schema(cls, pretty=False) -> str:

        schema = cls.schema()

        if pretty:

            return cls._pretty_print_json(schema)

        else:

            return json.dumps(schema)
validate
def validate(
    value: Any
) -> Model

Hack to allow overwriting of iter method without compromising pydantic validation. Part

of the pydantic API and not the Omnipy API.

Parameters:

Name Type Description Default
value Any

Returns:

Type Description
Model
View Source
    @classmethod

    def validate(cls: Type['Model'], value: Any) -> 'Model':

        """

        Hack to allow overwriting of __iter__ method without compromising pydantic validation. Part

        of the pydantic API and not the Omnipy API.

        """

        if isinstance(value, Model):

            with AttribHolder(value, '__iter__', GenericModel.__iter__, on_class=True):

                return super().validate(value)

        else:

            return super().validate(value)

Instance variables

contents

Methods

delitem
def __delitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name Type Description Default
args object
kwargs object

Returns:

Type Description
object
View Source
        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)
eq
def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name Type Description Default
other object

Returns:

Type Description
bool
View Source
    def __eq__(self, other: object) -> bool:

        return isinstance(other, Model) \

            and self.__class__ == other.__class__ \

            and self.contents == other.contents \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case
getattr
def __getattr__(
    self,
    attr: str
) -> Any

Parameters:

Name Type Description Default
attr str

Returns:

Type Description
Any
View Source
    def __getattr__(self, attr: str) -> Any:

        ret = self._getattr_from_contents(attr)

        if callable(ret):

            ret = add_callback_after_call(ret, self.validate_contents)

        return ret
iter
def __iter__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name Type Description Default
args object
kwargs object

Returns:

Type Description
object
View Source
        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)
setattr
def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name Type Description Default
attr str
value Any

Returns:

Type Description
NoneType
View Source
    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ and attr not in [ROOT_KEY]:

            super().__setattr__(attr, value)

        else:

            if attr in ['contents']:

                contents_prop = getattr(self.__class__, attr)

                contents_prop.__set__(self, value)

            else:

                raise RuntimeError('Model does not allow setting of extra attributes')
setitem
def __setitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name Type Description Default
args object
kwargs object

Returns:

Type Description
object
View Source
        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)
from_data
def from_data(
    self,
    value: Any
) -> None

Parameters:

Name Type Description Default
value Any

Returns:

Type Description
NoneType
View Source
    def from_data(self, value: Any) -> None:

        self.contents = value
from_json
def from_json(
    self,
    json_contents: str
) -> None

Parameters:

Name Type Description Default
json_contents str

Returns:

Type Description
NoneType
View Source
    def from_json(self, json_contents: str) -> None:

        new_model = self.parse_raw(json_contents, proto=pydantic_protocol.json)

        self._set_contents_without_validation(new_model)
inner_type
def inner_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name Type Description Default
with_args bool

Returns:

Type Description
type None
View Source
    def inner_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=False, with_args=with_args)
is_nested_type
def is_nested_type(
    self
) -> bool

Returns:

Type Description
bool
View Source
    def is_nested_type(self) -> bool:

        return not self.inner_type(with_args=True) == self.outer_type(with_args=True)
outer_type
def outer_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name Type Description Default
with_args bool

Returns:

Type Description
type None
View Source
    def outer_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=True, with_args=with_args)
to_data
def to_data(
    self
) -> Any

Returns:

Type Description
Any
View Source
    def to_data(self) -> Any:

        return self.dict()[ROOT_KEY]
to_json
def to_json(
    self,
    pretty=False
) -> str

Parameters:

Name Type Description Default
pretty

Returns:

Type Description
str
View Source
    def to_json(self, pretty=False) -> str:

        json_content = self.json()

        if pretty:

            return self._pretty_print_json(json.loads(json_content))

        else:

            return json_content
validate_contents
def validate_contents(
    self
)
View Source
    def validate_contents(self):

        self.contents = self.contents

PandasDataset

class PandasDataset(
    value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
    **input_data: object
)

Dict-based container of data files that follow a specific Model

Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

The specialization must be done through the use of Model, either directly, e.g.::

MyDataset = Dataset[Model[dict[str, list[int]]])

... or indirectly, using a Model subclass, e.g.::

class MyModel(Model[dict[str, list[int]]):
    pass

MyDataset = Dataset[MyModel]

... alternatively through the specification of a Dataset subclass::

class MyDataset(Dataset[MyModel]):
    pass

The specialization can also be done in a more deeply nested structure, e.g.::

class MyNumberList(Model[list[int]]):
    pass

class MyToplevelDict(Model[dict[str, MyNumberList]]):
    pass

class MyDataset(Dataset[MyToplevelDict]):
    pass

Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::

MyNumberListDataset = Dataset[Model[list[int]]]

my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]

print(my_dataset.keys())

The Dataset class is a wrapper class around the powerful GenericModel class from pydantic.

View Source
class PandasDataset(Dataset[PandasModel]):

    ...

Class variables

Config

Static methods

get_model_class
def get_model_class(

) -> Type[omnipy.data.model.Model]

Returns the concrete Model class used for all data files in the dataset, e.g.:

Model[list[int]]

Returns:

Type Description
Type[Model] The concrete Model class used for all data files in the dataset
View Source
    @classmethod

    def get_model_class(cls) -> Type[Model]:

        """

        Returns the concrete Model class used for all data files in the dataset, e.g.:

        `Model[list[int]]`

        :return: The concrete Model class used for all data files in the dataset

        """

        model_type = cls.__fields__.get(DATA_KEY).type_

        return cls._origmodel_if_annotated_optional(model_type)
to_json_schema
def to_json_schema(
    pretty=False
) -> str | dict[str, str]

Parameters:

Name Type Description Default
pretty

Returns:

Type Description
str dict[str, str]
View Source
    @classmethod

    def to_json_schema(cls, pretty=False) -> str | dict[str, str]:

        result = {}

        schema = cls.schema()

        for key, val in schema['properties']['data'].items():

            result[key] = val

        result['title'] = schema['title']

        result['definitions'] = schema['definitions']

        if pretty:

            return cls._pretty_print_json(result)

        else:

            return json.dumps(result)

Methods

eq
def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name Type Description Default
other object

Returns:

Type Description
bool
View Source
    def __eq__(self, other: object) -> bool:

        # return self.__class__ == other.__class__ and super().__eq__(other)

        return isinstance(other, Dataset) \

            and self.__class__ == other.__class__ \

            and self.data == other.data \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case
iter
def __iter__(
    self
) -> Iterator

so dict(model) works

Returns:

Type Description
Iterator
View Source
    def __iter__(self) -> Iterator:

        return UserDict.__iter__(self)
setattr
def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name Type Description Default
attr str
value Any

Returns:

Type Description
NoneType
View Source
    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):

            super().__setattr__(attr, value)

        else:

            raise RuntimeError('Model does not allow setting of extra attributes')
setitem
def __setitem__(
    self,
    obj_type: str,
    data_obj: Any
) -> None

Parameters:

Name Type Description Default
obj_type str
data_obj Any

Returns:

Type Description
NoneType
View Source
    def __setitem__(self, obj_type: str, data_obj: Any) -> None:

        has_prev_value = obj_type in self.data

        prev_value = self.data.get(obj_type)

        try:

            self.data[obj_type] = data_obj

            self._validate(obj_type)

        except:  # noqa

            if has_prev_value:

                self.data[obj_type] = prev_value

            else:

                del self.data[obj_type]

            raise
as_multi_model_dataset
def as_multi_model_dataset(
    self
) -> MultiModelDataset[ModelT]

Returns:

Type Description
'MultiModelDataset[ModelT]'
View Source
    def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':

        multi_model_dataset = MultiModelDataset[self.get_model_class()]()

        for obj_type in self:

            multi_model_dataset.data[obj_type] = self.data[obj_type]

        return multi_model_dataset
from_data
def from_data(
    self,
    data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
    update: bool = True
) -> None

Parameters:

Name Type Description Default
data Union[dict[str, Any], Iterator[tuple[str, Any]]]
update bool True

Returns:

Type Description
NoneType
View Source
    def from_data(self,

                  data: dict[str, Any] | Iterator[tuple[str, Any]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_data(obj_val)

            self[obj_type] = new_model
from_json
def from_json(
    self,
    data: Union[dict[str, str], Iterator[tuple[str, str]]],
    update: bool = True
) -> None

Parameters:

Name Type Description Default
data Union[dict[str, str], Iterator[tuple[str, str]]]
update bool True

Returns:

Type Description
NoneType
View Source
    def from_json(self,

                  data: dict[str, str] | Iterator[tuple[str, str]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_json(obj_val)

            self[obj_type] = new_model
to_data
def to_data(
    self
) -> dict[str, typing.Any]

Returns:

Type Description
dict[str, typing.Any]
View Source
    def to_data(self) -> dict[str, Any]:

        return GenericModel.dict(self).get(DATA_KEY)
to_json
def to_json(
    self,
    pretty=False
) -> dict[str, str]

Parameters:

Name Type Description Default
pretty

Returns:

Type Description
dict[str, str]
View Source
    def to_json(self, pretty=False) -> dict[str, str]:

        result = {}

        for key, val in self.to_data().items():

            result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)

        return result

PandasModel

class PandasModel(
    value: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    __root__: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
    **data: Any
)

A data model containing a value parsed according to the model.

If no value is provided, the value is set to the default value of the data model, found by calling the model class without parameters, e.g. int().

Model is a generic class that cannot be instantiated directly. Instead, a Model class needs to be specialized with a data type before Model objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

Example data model specialized as a class alias::

MyNumberList = Model[list[int]]

... alternatively as a Model subclass::

class MyNumberList(Model[list[int]]):
    pass

Once instantiated, a Model object functions as a parser, e.g.::

my_number_list = MyNumberList([2,3,4])

my_number_list.contents = ['3', 4, True]
assert my_number_list.contents == [3,4,1]

While the following should raise a ValidationError::

my_number_list.contents = ['abc', 'def']

The Model class is a wrapper class around the powerful GenericModel class from pydantic.

See also docs of the Dataset class for more usage examples.

View Source
class PandasModel(Model[pd.DataFrame]):

    @classmethod

    def _parse_data(cls, data: pd.DataFrame) -> pd.DataFrame:

        cls._data_column_names_are_strings(data)

        cls._data_not_empty_object(data)

        return data

    @staticmethod

    def _data_column_names_are_strings(data: pd.DataFrame) -> None:

        for column in data.columns:

            assert isinstance(column, str)

    @staticmethod

    def _data_not_empty_object(data: pd.DataFrame) -> None:

        assert not any(data.isna().all(axis=1))

    def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:

        df = super().dict(*args, **kwargs)[ROOT_KEY]

        df = df.replace({pd.NA: None})

        return {ROOT_KEY: df.to_dict(orient='records')}

    def from_data(self, value: Iterable[Any]) -> None:

        self.contents = pd.DataFrame(value).convert_dtypes()

    def from_json(self, value: str) -> None:

        self.contents = pd.read_json(value).convert_dtypes()

Class variables

Config

Static methods

to_json_schema
def to_json_schema(
    pretty=False
) -> str

Parameters:

Name Type Description Default
pretty

Returns:

Type Description
str
View Source
    @classmethod

    def to_json_schema(cls, pretty=False) -> str:

        schema = cls.schema()

        if pretty:

            return cls._pretty_print_json(schema)

        else:

            return json.dumps(schema)
validate
def validate(
    value: Any
) -> Model

Hack to allow overwriting of iter method without compromising pydantic validation. Part

of the pydantic API and not the Omnipy API.

Parameters:

Name Type Description Default
value Any

Returns:

Type Description
Model
View Source
    @classmethod

    def validate(cls: Type['Model'], value: Any) -> 'Model':

        """

        Hack to allow overwriting of __iter__ method without compromising pydantic validation. Part

        of the pydantic API and not the Omnipy API.

        """

        if isinstance(value, Model):

            with AttribHolder(value, '__iter__', GenericModel.__iter__, on_class=True):

                return super().validate(value)

        else:

            return super().validate(value)

Instance variables

contents

Methods

delitem
def __delitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name Type Description Default
args object
kwargs object

Returns:

Type Description
object
View Source
        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)
eq
def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name Type Description Default
other object

Returns:

Type Description
bool
View Source
    def __eq__(self, other: object) -> bool:

        return isinstance(other, Model) \

            and self.__class__ == other.__class__ \

            and self.contents == other.contents \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case
getattr
def __getattr__(
    self,
    attr: str
) -> Any

Parameters:

Name Type Description Default
attr str

Returns:

Type Description
Any
View Source
    def __getattr__(self, attr: str) -> Any:

        ret = self._getattr_from_contents(attr)

        if callable(ret):

            ret = add_callback_after_call(ret, self.validate_contents)

        return ret
iter
def __iter__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name Type Description Default
args object
kwargs object

Returns:

Type Description
object
View Source
        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)
setattr
def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name Type Description Default
attr str
value Any

Returns:

Type Description
NoneType
View Source
    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ and attr not in [ROOT_KEY]:

            super().__setattr__(attr, value)

        else:

            if attr in ['contents']:

                contents_prop = getattr(self.__class__, attr)

                contents_prop.__set__(self, value)

            else:

                raise RuntimeError('Model does not allow setting of extra attributes')
setitem
def __setitem__(
    self,
    *args: object,
    **kwargs: object
) -> object

Parameters:

Name Type Description Default
args object
kwargs object

Returns:

Type Description
object
View Source
        def _method(cls_or_self, /, *args, **keywords):

            keywords = {**self.keywords, **keywords}

            return self.func(cls_or_self, *self.args, *args, **keywords)
dict
def dict(
    self,
    *args,
    **kwargs
) -> dict[str, dict[typing.Any, typing.Any]]

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

Parameters:

Name Type Description Default
args
kwargs

Returns:

Type Description
dict[str, dict[typing.Any, typing.Any]]
View Source
    def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:

        df = super().dict(*args, **kwargs)[ROOT_KEY]

        df = df.replace({pd.NA: None})

        return {ROOT_KEY: df.to_dict(orient='records')}
from_data
def from_data(
    self,
    value: collections.abc.Iterable[typing.Any]
) -> None

Parameters:

Name Type Description Default
value collections.abc.Iterable[typing.Any]

Returns:

Type Description
NoneType
View Source
    def from_data(self, value: Iterable[Any]) -> None:

        self.contents = pd.DataFrame(value).convert_dtypes()
from_json
def from_json(
    self,
    value: str
) -> None

Parameters:

Name Type Description Default
value str

Returns:

Type Description
NoneType
View Source
    def from_json(self, value: str) -> None:

        self.contents = pd.read_json(value).convert_dtypes()
inner_type
def inner_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name Type Description Default
with_args bool

Returns:

Type Description
type None
View Source
    def inner_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=False, with_args=with_args)
is_nested_type
def is_nested_type(
    self
) -> bool

Returns:

Type Description
bool
View Source
    def is_nested_type(self) -> bool:

        return not self.inner_type(with_args=True) == self.outer_type(with_args=True)
outer_type
def outer_type(
    self,
    with_args: bool = False
) -> type | None

Parameters:

Name Type Description Default
with_args bool

Returns:

Type Description
type None
View Source
    def outer_type(self, with_args: bool = False) -> type | None:

        return self.__class__._get_root_type(outer=True, with_args=with_args)
to_data
def to_data(
    self
) -> Any

Returns:

Type Description
Any
View Source
    def to_data(self) -> Any:

        return self.dict()[ROOT_KEY]
to_json
def to_json(
    self,
    pretty=False
) -> str

Parameters:

Name Type Description Default
pretty

Returns:

Type Description
str
View Source
    def to_json(self, pretty=False) -> str:

        json_content = self.json()

        if pretty:

            return self._pretty_print_json(json.loads(json_content))

        else:

            return json_content
validate_contents
def validate_contents(
    self
)
View Source
    def validate_contents(self):

        self.contents = self.contents