Module omnipy.modules.general.datasets

Overview

View Source

from typing import Generic, TypeVar

from omnipy.data.dataset import Dataset

from omnipy.modules.general.models import (NestedFrozenDictsModel,

                                           NestedFrozenDictsOrTuplesModel,

                                           NestedFrozenTuplesModel)

_KeyT = TypeVar('_KeyT')

_ScT = TypeVar('_ScT')

class NestedFrozenTuplesDataset(Dataset[NestedFrozenTuplesModel[_ScT]], Generic[_ScT]):

    ...

class NestedFrozenDictsDataset(Dataset[NestedFrozenDictsModel[_KeyT, _ScT]], Generic[_KeyT, _ScT]):

    ...

class NestedFrozenDictsOrTuplesDataset(Dataset[NestedFrozenDictsOrTuplesModel[_KeyT, _ScT]],

                                       Generic[_KeyT, _ScT]):

    ...

Classes

NestedFrozenDictsDataset

class NestedFrozenDictsDataset(
    value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
    **input_data: object
)

Dict-based container of data files that follow a specific Model

Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

The specialization must be done through the use of Model, either directly, e.g.::

MyDataset = Dataset[Model[dict[str, list[int]]])

... or indirectly, using a Model subclass, e.g.::

class MyModel(Model[dict[str, list[int]]):
    pass

MyDataset = Dataset[MyModel]

... alternatively through the specification of a Dataset subclass::

class MyDataset(Dataset[MyModel]):
    pass

The specialization can also be done in a more deeply nested structure, e.g.::

class MyNumberList(Model[list[int]]):
    pass

class MyToplevelDict(Model[dict[str, MyNumberList]]):
    pass

class MyDataset(Dataset[MyToplevelDict]):
    pass

Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::

MyNumberListDataset = Dataset[Model[list[int]]]

my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]

print(my_dataset.keys())

The Dataset class is a wrapper class around the powerful GenericModel class from pydantic.

View Source

class NestedFrozenDictsDataset(Dataset[NestedFrozenDictsModel[_KeyT, _ScT]], Generic[_KeyT, _ScT]):

    ...

Class variables

Config

Static methods

get_model_class

def get_model_class(

) -> Type[omnipy.data.model.Model]

Returns the concrete Model class used for all data files in the dataset, e.g.:

Model[list[int]]

Returns:

Type	Description
`Type[Model]`	The concrete Model class used for all data files in the dataset

View Source

    @classmethod

    def get_model_class(cls) -> Type[Model]:

        """

        Returns the concrete Model class used for all data files in the dataset, e.g.:

        `Model[list[int]]`

        :return: The concrete Model class used for all data files in the dataset

        """

        model_type = cls.__fields__.get(DATA_KEY).type_

        return cls._origmodel_if_annotated_optional(model_type)

to_json_schema

def to_json_schema(
    pretty=False
) -> str | dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`	dict[str, str]

View Source

    @classmethod

    def to_json_schema(cls, pretty=False) -> str | dict[str, str]:

        result = {}

        schema = cls.schema()

        for key, val in schema['properties']['data'].items():

            result[key] = val

        result['title'] = schema['title']

        result['definitions'] = schema['definitions']

        if pretty:

            return cls._pretty_print_json(result)

        else:

            return json.dumps(result)

Methods

eq

def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name	Type	Description	Default
`other`	`object`

Returns:

Type	Description
`bool`

View Source

    def __eq__(self, other: object) -> bool:

        # return self.__class__ == other.__class__ and super().__eq__(other)

        return isinstance(other, Dataset) \

            and self.__class__ == other.__class__ \

            and self.data == other.data \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case

iter

def __iter__(
    self
) -> Iterator

so dict(model) works

Returns:

Type	Description
`Iterator`

View Source

    def __iter__(self) -> Iterator:

        return UserDict.__iter__(self)

setattr

def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name	Type	Description	Default
`attr`	`str`
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):

            super().__setattr__(attr, value)

        else:

            raise RuntimeError('Model does not allow setting of extra attributes')

setitem

def __setitem__(
    self,
    obj_type: str,
    data_obj: Any
) -> None

Parameters:

Name	Type	Description	Default
`obj_type`	`str`
`data_obj`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setitem__(self, obj_type: str, data_obj: Any) -> None:

        has_prev_value = obj_type in self.data

        prev_value = self.data.get(obj_type)

        try:

            self.data[obj_type] = data_obj

            self._validate(obj_type)

        except:  # noqa

            if has_prev_value:

                self.data[obj_type] = prev_value

            else:

                del self.data[obj_type]

            raise

as_multi_model_dataset

def as_multi_model_dataset(
    self
) -> MultiModelDataset[ModelT]

Returns:

Type	Description
`'MultiModelDataset[ModelT]'`

View Source

    def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':

        multi_model_dataset = MultiModelDataset[self.get_model_class()]()

        for obj_type in self:

            multi_model_dataset.data[obj_type] = self.data[obj_type]

        return multi_model_dataset

from_data

def from_data(
    self,
    data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, Any], Iterator[tuple[str, Any]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_data(self,

                  data: dict[str, Any] | Iterator[tuple[str, Any]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_data(obj_val)

            self[obj_type] = new_model

from_json

def from_json(
    self,
    data: Union[dict[str, str], Iterator[tuple[str, str]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, str], Iterator[tuple[str, str]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_json(self,

                  data: dict[str, str] | Iterator[tuple[str, str]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_json(obj_val)

            self[obj_type] = new_model

to_data

def to_data(
    self
) -> dict[str, typing.Any]

Returns:

Type	Description
`dict[str, typing.Any]`

View Source

    def to_data(self) -> dict[str, Any]:

        return GenericModel.dict(self).get(DATA_KEY)

to_json

def to_json(
    self,
    pretty=False
) -> dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`dict[str, str]`

View Source

    def to_json(self, pretty=False) -> dict[str, str]:

        result = {}

        for key, val in self.to_data().items():

            result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)

        return result

NestedFrozenDictsOrTuplesDataset

class NestedFrozenDictsOrTuplesDataset(
    value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
    **input_data: object
)

Dict-based container of data files that follow a specific Model

Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

The specialization must be done through the use of Model, either directly, e.g.::

MyDataset = Dataset[Model[dict[str, list[int]]])

... or indirectly, using a Model subclass, e.g.::

class MyModel(Model[dict[str, list[int]]):
    pass

MyDataset = Dataset[MyModel]

... alternatively through the specification of a Dataset subclass::

class MyDataset(Dataset[MyModel]):
    pass

The specialization can also be done in a more deeply nested structure, e.g.::

class MyNumberList(Model[list[int]]):
    pass

class MyToplevelDict(Model[dict[str, MyNumberList]]):
    pass

class MyDataset(Dataset[MyToplevelDict]):
    pass

Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::

MyNumberListDataset = Dataset[Model[list[int]]]

my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]

print(my_dataset.keys())

The Dataset class is a wrapper class around the powerful GenericModel class from pydantic.

View Source

class NestedFrozenDictsOrTuplesDataset(Dataset[NestedFrozenDictsOrTuplesModel[_KeyT, _ScT]],

                                       Generic[_KeyT, _ScT]):

    ...

Class variables

Config

Static methods

get_model_class

def get_model_class(

) -> Type[omnipy.data.model.Model]

Returns the concrete Model class used for all data files in the dataset, e.g.:

Model[list[int]]

Returns:

Type	Description
`Type[Model]`	The concrete Model class used for all data files in the dataset

View Source

    @classmethod

    def get_model_class(cls) -> Type[Model]:

        """

        Returns the concrete Model class used for all data files in the dataset, e.g.:

        `Model[list[int]]`

        :return: The concrete Model class used for all data files in the dataset

        """

        model_type = cls.__fields__.get(DATA_KEY).type_

        return cls._origmodel_if_annotated_optional(model_type)

to_json_schema

def to_json_schema(
    pretty=False
) -> str | dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`	dict[str, str]

View Source

    @classmethod

    def to_json_schema(cls, pretty=False) -> str | dict[str, str]:

        result = {}

        schema = cls.schema()

        for key, val in schema['properties']['data'].items():

            result[key] = val

        result['title'] = schema['title']

        result['definitions'] = schema['definitions']

        if pretty:

            return cls._pretty_print_json(result)

        else:

            return json.dumps(result)

Methods

eq

def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name	Type	Description	Default
`other`	`object`

Returns:

Type	Description
`bool`

View Source

    def __eq__(self, other: object) -> bool:

        # return self.__class__ == other.__class__ and super().__eq__(other)

        return isinstance(other, Dataset) \

            and self.__class__ == other.__class__ \

            and self.data == other.data \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case

iter

def __iter__(
    self
) -> Iterator

so dict(model) works

Returns:

Type	Description
`Iterator`

View Source

    def __iter__(self) -> Iterator:

        return UserDict.__iter__(self)

setattr

def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name	Type	Description	Default
`attr`	`str`
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):

            super().__setattr__(attr, value)

        else:

            raise RuntimeError('Model does not allow setting of extra attributes')

setitem

def __setitem__(
    self,
    obj_type: str,
    data_obj: Any
) -> None

Parameters:

Name	Type	Description	Default
`obj_type`	`str`
`data_obj`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setitem__(self, obj_type: str, data_obj: Any) -> None:

        has_prev_value = obj_type in self.data

        prev_value = self.data.get(obj_type)

        try:

            self.data[obj_type] = data_obj

            self._validate(obj_type)

        except:  # noqa

            if has_prev_value:

                self.data[obj_type] = prev_value

            else:

                del self.data[obj_type]

            raise

as_multi_model_dataset

def as_multi_model_dataset(
    self
) -> MultiModelDataset[ModelT]

Returns:

Type	Description
`'MultiModelDataset[ModelT]'`

View Source

    def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':

        multi_model_dataset = MultiModelDataset[self.get_model_class()]()

        for obj_type in self:

            multi_model_dataset.data[obj_type] = self.data[obj_type]

        return multi_model_dataset

from_data

def from_data(
    self,
    data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, Any], Iterator[tuple[str, Any]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_data(self,

                  data: dict[str, Any] | Iterator[tuple[str, Any]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_data(obj_val)

            self[obj_type] = new_model

from_json

def from_json(
    self,
    data: Union[dict[str, str], Iterator[tuple[str, str]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, str], Iterator[tuple[str, str]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_json(self,

                  data: dict[str, str] | Iterator[tuple[str, str]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_json(obj_val)

            self[obj_type] = new_model

to_data

def to_data(
    self
) -> dict[str, typing.Any]

Returns:

Type	Description
`dict[str, typing.Any]`

View Source

    def to_data(self) -> dict[str, Any]:

        return GenericModel.dict(self).get(DATA_KEY)

to_json

def to_json(
    self,
    pretty=False
) -> dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`dict[str, str]`

View Source

    def to_json(self, pretty=False) -> dict[str, str]:

        result = {}

        for key, val in self.to_data().items():

            result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)

        return result

NestedFrozenTuplesDataset

class NestedFrozenTuplesDataset(
    value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
    *,
    data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
    **input_data: object
)

Dict-based container of data files that follow a specific Model

Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.

The specialization must be done through the use of Model, either directly, e.g.::

MyDataset = Dataset[Model[dict[str, list[int]]])

... or indirectly, using a Model subclass, e.g.::

class MyModel(Model[dict[str, list[int]]):
    pass

MyDataset = Dataset[MyModel]

... alternatively through the specification of a Dataset subclass::

class MyDataset(Dataset[MyModel]):
    pass

The specialization can also be done in a more deeply nested structure, e.g.::

class MyNumberList(Model[list[int]]):
    pass

class MyToplevelDict(Model[dict[str, MyNumberList]]):
    pass

class MyDataset(Dataset[MyToplevelDict]):
    pass

Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::

MyNumberListDataset = Dataset[Model[list[int]]]

my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]

print(my_dataset.keys())

The Dataset class is a wrapper class around the powerful GenericModel class from pydantic.

View Source

class NestedFrozenTuplesDataset(Dataset[NestedFrozenTuplesModel[_ScT]], Generic[_ScT]):

    ...

Class variables

Config

Static methods

get_model_class

def get_model_class(

) -> Type[omnipy.data.model.Model]

Returns the concrete Model class used for all data files in the dataset, e.g.:

Model[list[int]]

Returns:

Type	Description
`Type[Model]`	The concrete Model class used for all data files in the dataset

View Source

    @classmethod

    def get_model_class(cls) -> Type[Model]:

        """

        Returns the concrete Model class used for all data files in the dataset, e.g.:

        `Model[list[int]]`

        :return: The concrete Model class used for all data files in the dataset

        """

        model_type = cls.__fields__.get(DATA_KEY).type_

        return cls._origmodel_if_annotated_optional(model_type)

to_json_schema

def to_json_schema(
    pretty=False
) -> str | dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`str`	dict[str, str]

View Source

    @classmethod

    def to_json_schema(cls, pretty=False) -> str | dict[str, str]:

        result = {}

        schema = cls.schema()

        for key, val in schema['properties']['data'].items():

            result[key] = val

        result['title'] = schema['title']

        result['definitions'] = schema['definitions']

        if pretty:

            return cls._pretty_print_json(result)

        else:

            return json.dumps(result)

Methods

eq

def __eq__(
    self,
    other: object
) -> bool

Return self==value.

Parameters:

Name	Type	Description	Default
`other`	`object`

Returns:

Type	Description
`bool`

View Source

    def __eq__(self, other: object) -> bool:

        # return self.__class__ == other.__class__ and super().__eq__(other)

        return isinstance(other, Dataset) \

            and self.__class__ == other.__class__ \

            and self.data == other.data \

            and self.to_data() == other.to_data()  # last is probably unnecessary, but just in case

iter

def __iter__(
    self
) -> Iterator

so dict(model) works

Returns:

Type	Description
`Iterator`

View Source

    def __iter__(self) -> Iterator:

        return UserDict.__iter__(self)

setattr

def __setattr__(
    self,
    attr: str,
    value: Any
) -> None

Implement setattr(self, name, value).

Parameters:

Name	Type	Description	Default
`attr`	`str`
`value`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setattr__(self, attr: str, value: Any) -> None:

        if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):

            super().__setattr__(attr, value)

        else:

            raise RuntimeError('Model does not allow setting of extra attributes')

setitem

def __setitem__(
    self,
    obj_type: str,
    data_obj: Any
) -> None

Parameters:

Name	Type	Description	Default
`obj_type`	`str`
`data_obj`	`Any`

Returns:

Type	Description
`NoneType`

View Source

    def __setitem__(self, obj_type: str, data_obj: Any) -> None:

        has_prev_value = obj_type in self.data

        prev_value = self.data.get(obj_type)

        try:

            self.data[obj_type] = data_obj

            self._validate(obj_type)

        except:  # noqa

            if has_prev_value:

                self.data[obj_type] = prev_value

            else:

                del self.data[obj_type]

            raise

as_multi_model_dataset

def as_multi_model_dataset(
    self
) -> MultiModelDataset[ModelT]

Returns:

Type	Description
`'MultiModelDataset[ModelT]'`

View Source

    def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':

        multi_model_dataset = MultiModelDataset[self.get_model_class()]()

        for obj_type in self:

            multi_model_dataset.data[obj_type] = self.data[obj_type]

        return multi_model_dataset

from_data

def from_data(
    self,
    data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, Any], Iterator[tuple[str, Any]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_data(self,

                  data: dict[str, Any] | Iterator[tuple[str, Any]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_data(obj_val)

            self[obj_type] = new_model

from_json

def from_json(
    self,
    data: Union[dict[str, str], Iterator[tuple[str, str]]],
    update: bool = True
) -> None

Parameters:

Name	Type	Description	Default
`data`	`Union[dict[str, str], Iterator[tuple[str, str]]]`
`update`	`bool`		True

Returns:

Type	Description
`NoneType`

View Source

    def from_json(self,

                  data: dict[str, str] | Iterator[tuple[str, str]],

                  update: bool = True) -> None:

        if not isinstance(data, dict):

            data = dict(data)

        if not update:

            self.clear()

        for obj_type, obj_val in data.items():

            new_model = self.get_model_class()()  # noqa

            new_model.from_json(obj_val)

            self[obj_type] = new_model

to_data

def to_data(
    self
) -> dict[str, typing.Any]

Returns:

Type	Description
`dict[str, typing.Any]`

View Source

    def to_data(self) -> dict[str, Any]:

        return GenericModel.dict(self).get(DATA_KEY)

to_json

def to_json(
    self,
    pretty=False
) -> dict[str, str]

Parameters:

Name	Type	Description	Default
`pretty`

Returns:

Type	Description
`dict[str, str]`

View Source

    def to_json(self, pretty=False) -> dict[str, str]:

        result = {}

        for key, val in self.to_data().items():

            result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)

        return result