Module omnipy.modules.general.datasets
Overview
View Source
from typing import Generic, TypeVar
from omnipy.data.dataset import Dataset
from omnipy.modules.general.models import (NestedFrozenDictsModel,
NestedFrozenDictsOrTuplesModel,
NestedFrozenTuplesModel)
_KeyT = TypeVar('_KeyT')
_ScT = TypeVar('_ScT')
class NestedFrozenTuplesDataset(Dataset[NestedFrozenTuplesModel[_ScT]], Generic[_ScT]):
...
class NestedFrozenDictsDataset(Dataset[NestedFrozenDictsModel[_KeyT, _ScT]], Generic[_KeyT, _ScT]):
...
class NestedFrozenDictsOrTuplesDataset(Dataset[NestedFrozenDictsOrTuplesModel[_KeyT, _ScT]],
Generic[_KeyT, _ScT]):
...
Classes
NestedFrozenDictsDataset
class NestedFrozenDictsDataset(
value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
*,
data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
**input_data: object
)
Dict-based container of data files that follow a specific Model
Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.
The specialization must be done through the use of Model, either directly, e.g.::
MyDataset = Dataset[Model[dict[str, list[int]]])
... or indirectly, using a Model subclass, e.g.::
class MyModel(Model[dict[str, list[int]]):
pass
MyDataset = Dataset[MyModel]
... alternatively through the specification of a Dataset subclass::
class MyDataset(Dataset[MyModel]):
pass
The specialization can also be done in a more deeply nested structure, e.g.::
class MyNumberList(Model[list[int]]):
pass
class MyToplevelDict(Model[dict[str, MyNumberList]]):
pass
class MyDataset(Dataset[MyToplevelDict]):
pass
Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::
MyNumberListDataset = Dataset[Model[list[int]]]
my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]
print(my_dataset.keys())
The Dataset class is a wrapper class around the powerful GenericModel
class from pydantic.
View Source
class NestedFrozenDictsDataset(Dataset[NestedFrozenDictsModel[_KeyT, _ScT]], Generic[_KeyT, _ScT]):
...
Class variables
Static methods
get_model_class
Returns the concrete Model class used for all data files in the dataset, e.g.:
Model[list[int]]
Returns:
Type | Description |
---|---|
Type[Model] |
The concrete Model class used for all data files in the dataset |
View Source
@classmethod
def get_model_class(cls) -> Type[Model]:
"""
Returns the concrete Model class used for all data files in the dataset, e.g.:
`Model[list[int]]`
:return: The concrete Model class used for all data files in the dataset
"""
model_type = cls.__fields__.get(DATA_KEY).type_
return cls._origmodel_if_annotated_optional(model_type)
to_json_schema
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
dict[str, str] |
View Source
@classmethod
def to_json_schema(cls, pretty=False) -> str | dict[str, str]:
result = {}
schema = cls.schema()
for key, val in schema['properties']['data'].items():
result[key] = val
result['title'] = schema['title']
result['definitions'] = schema['definitions']
if pretty:
return cls._pretty_print_json(result)
else:
return json.dumps(result)
Methods
eq
Return self==value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
object |
Returns:
Type | Description |
---|---|
bool |
View Source
def __eq__(self, other: object) -> bool:
# return self.__class__ == other.__class__ and super().__eq__(other)
return isinstance(other, Dataset) \
and self.__class__ == other.__class__ \
and self.data == other.data \
and self.to_data() == other.to_data() # last is probably unnecessary, but just in case
iter
so dict(model)
works
Returns:
Type | Description |
---|---|
Iterator |
View Source
def __iter__(self) -> Iterator:
return UserDict.__iter__(self)
setattr
Implement setattr(self, name, value).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
||
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setattr__(self, attr: str, value: Any) -> None:
if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):
super().__setattr__(attr, value)
else:
raise RuntimeError('Model does not allow setting of extra attributes')
setitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obj_type |
str |
||
data_obj |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setitem__(self, obj_type: str, data_obj: Any) -> None:
has_prev_value = obj_type in self.data
prev_value = self.data.get(obj_type)
try:
self.data[obj_type] = data_obj
self._validate(obj_type)
except: # noqa
if has_prev_value:
self.data[obj_type] = prev_value
else:
del self.data[obj_type]
raise
as_multi_model_dataset
Returns:
Type | Description |
---|---|
'MultiModelDataset[ModelT]' |
View Source
def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':
multi_model_dataset = MultiModelDataset[self.get_model_class()]()
for obj_type in self:
multi_model_dataset.data[obj_type] = self.data[obj_type]
return multi_model_dataset
from_data
def from_data(
self,
data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, Any], Iterator[tuple[str, Any]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_data(self,
data: dict[str, Any] | Iterator[tuple[str, Any]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_data(obj_val)
self[obj_type] = new_model
from_json
def from_json(
self,
data: Union[dict[str, str], Iterator[tuple[str, str]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, str], Iterator[tuple[str, str]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_json(self,
data: dict[str, str] | Iterator[tuple[str, str]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_json(obj_val)
self[obj_type] = new_model
to_data
Returns:
Type | Description |
---|---|
dict[str, typing.Any] |
View Source
def to_data(self) -> dict[str, Any]:
return GenericModel.dict(self).get(DATA_KEY)
to_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
dict[str, str] |
View Source
def to_json(self, pretty=False) -> dict[str, str]:
result = {}
for key, val in self.to_data().items():
result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)
return result
NestedFrozenDictsOrTuplesDataset
class NestedFrozenDictsOrTuplesDataset(
value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
*,
data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
**input_data: object
)
Dict-based container of data files that follow a specific Model
Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.
The specialization must be done through the use of Model, either directly, e.g.::
MyDataset = Dataset[Model[dict[str, list[int]]])
... or indirectly, using a Model subclass, e.g.::
class MyModel(Model[dict[str, list[int]]):
pass
MyDataset = Dataset[MyModel]
... alternatively through the specification of a Dataset subclass::
class MyDataset(Dataset[MyModel]):
pass
The specialization can also be done in a more deeply nested structure, e.g.::
class MyNumberList(Model[list[int]]):
pass
class MyToplevelDict(Model[dict[str, MyNumberList]]):
pass
class MyDataset(Dataset[MyToplevelDict]):
pass
Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::
MyNumberListDataset = Dataset[Model[list[int]]]
my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]
print(my_dataset.keys())
The Dataset class is a wrapper class around the powerful GenericModel
class from pydantic.
View Source
class NestedFrozenDictsOrTuplesDataset(Dataset[NestedFrozenDictsOrTuplesModel[_KeyT, _ScT]],
Generic[_KeyT, _ScT]):
...
Class variables
Static methods
get_model_class
Returns the concrete Model class used for all data files in the dataset, e.g.:
Model[list[int]]
Returns:
Type | Description |
---|---|
Type[Model] |
The concrete Model class used for all data files in the dataset |
View Source
@classmethod
def get_model_class(cls) -> Type[Model]:
"""
Returns the concrete Model class used for all data files in the dataset, e.g.:
`Model[list[int]]`
:return: The concrete Model class used for all data files in the dataset
"""
model_type = cls.__fields__.get(DATA_KEY).type_
return cls._origmodel_if_annotated_optional(model_type)
to_json_schema
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
dict[str, str] |
View Source
@classmethod
def to_json_schema(cls, pretty=False) -> str | dict[str, str]:
result = {}
schema = cls.schema()
for key, val in schema['properties']['data'].items():
result[key] = val
result['title'] = schema['title']
result['definitions'] = schema['definitions']
if pretty:
return cls._pretty_print_json(result)
else:
return json.dumps(result)
Methods
eq
Return self==value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
object |
Returns:
Type | Description |
---|---|
bool |
View Source
def __eq__(self, other: object) -> bool:
# return self.__class__ == other.__class__ and super().__eq__(other)
return isinstance(other, Dataset) \
and self.__class__ == other.__class__ \
and self.data == other.data \
and self.to_data() == other.to_data() # last is probably unnecessary, but just in case
iter
so dict(model)
works
Returns:
Type | Description |
---|---|
Iterator |
View Source
def __iter__(self) -> Iterator:
return UserDict.__iter__(self)
setattr
Implement setattr(self, name, value).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
||
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setattr__(self, attr: str, value: Any) -> None:
if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):
super().__setattr__(attr, value)
else:
raise RuntimeError('Model does not allow setting of extra attributes')
setitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obj_type |
str |
||
data_obj |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setitem__(self, obj_type: str, data_obj: Any) -> None:
has_prev_value = obj_type in self.data
prev_value = self.data.get(obj_type)
try:
self.data[obj_type] = data_obj
self._validate(obj_type)
except: # noqa
if has_prev_value:
self.data[obj_type] = prev_value
else:
del self.data[obj_type]
raise
as_multi_model_dataset
Returns:
Type | Description |
---|---|
'MultiModelDataset[ModelT]' |
View Source
def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':
multi_model_dataset = MultiModelDataset[self.get_model_class()]()
for obj_type in self:
multi_model_dataset.data[obj_type] = self.data[obj_type]
return multi_model_dataset
from_data
def from_data(
self,
data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, Any], Iterator[tuple[str, Any]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_data(self,
data: dict[str, Any] | Iterator[tuple[str, Any]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_data(obj_val)
self[obj_type] = new_model
from_json
def from_json(
self,
data: Union[dict[str, str], Iterator[tuple[str, str]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, str], Iterator[tuple[str, str]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_json(self,
data: dict[str, str] | Iterator[tuple[str, str]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_json(obj_val)
self[obj_type] = new_model
to_data
Returns:
Type | Description |
---|---|
dict[str, typing.Any] |
View Source
def to_data(self) -> dict[str, Any]:
return GenericModel.dict(self).get(DATA_KEY)
to_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
dict[str, str] |
View Source
def to_json(self, pretty=False) -> dict[str, str]:
result = {}
for key, val in self.to_data().items():
result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)
return result
NestedFrozenTuplesDataset
class NestedFrozenTuplesDataset(
value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
*,
data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
**input_data: object
)
Dict-based container of data files that follow a specific Model
Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.
The specialization must be done through the use of Model, either directly, e.g.::
MyDataset = Dataset[Model[dict[str, list[int]]])
... or indirectly, using a Model subclass, e.g.::
class MyModel(Model[dict[str, list[int]]):
pass
MyDataset = Dataset[MyModel]
... alternatively through the specification of a Dataset subclass::
class MyDataset(Dataset[MyModel]):
pass
The specialization can also be done in a more deeply nested structure, e.g.::
class MyNumberList(Model[list[int]]):
pass
class MyToplevelDict(Model[dict[str, MyNumberList]]):
pass
class MyDataset(Dataset[MyToplevelDict]):
pass
Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::
MyNumberListDataset = Dataset[Model[list[int]]]
my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]
print(my_dataset.keys())
The Dataset class is a wrapper class around the powerful GenericModel
class from pydantic.
View Source
class NestedFrozenTuplesDataset(Dataset[NestedFrozenTuplesModel[_ScT]], Generic[_ScT]):
...
Class variables
Static methods
get_model_class
Returns the concrete Model class used for all data files in the dataset, e.g.:
Model[list[int]]
Returns:
Type | Description |
---|---|
Type[Model] |
The concrete Model class used for all data files in the dataset |
View Source
@classmethod
def get_model_class(cls) -> Type[Model]:
"""
Returns the concrete Model class used for all data files in the dataset, e.g.:
`Model[list[int]]`
:return: The concrete Model class used for all data files in the dataset
"""
model_type = cls.__fields__.get(DATA_KEY).type_
return cls._origmodel_if_annotated_optional(model_type)
to_json_schema
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
dict[str, str] |
View Source
@classmethod
def to_json_schema(cls, pretty=False) -> str | dict[str, str]:
result = {}
schema = cls.schema()
for key, val in schema['properties']['data'].items():
result[key] = val
result['title'] = schema['title']
result['definitions'] = schema['definitions']
if pretty:
return cls._pretty_print_json(result)
else:
return json.dumps(result)
Methods
eq
Return self==value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
object |
Returns:
Type | Description |
---|---|
bool |
View Source
def __eq__(self, other: object) -> bool:
# return self.__class__ == other.__class__ and super().__eq__(other)
return isinstance(other, Dataset) \
and self.__class__ == other.__class__ \
and self.data == other.data \
and self.to_data() == other.to_data() # last is probably unnecessary, but just in case
iter
so dict(model)
works
Returns:
Type | Description |
---|---|
Iterator |
View Source
def __iter__(self) -> Iterator:
return UserDict.__iter__(self)
setattr
Implement setattr(self, name, value).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
||
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setattr__(self, attr: str, value: Any) -> None:
if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):
super().__setattr__(attr, value)
else:
raise RuntimeError('Model does not allow setting of extra attributes')
setitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obj_type |
str |
||
data_obj |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setitem__(self, obj_type: str, data_obj: Any) -> None:
has_prev_value = obj_type in self.data
prev_value = self.data.get(obj_type)
try:
self.data[obj_type] = data_obj
self._validate(obj_type)
except: # noqa
if has_prev_value:
self.data[obj_type] = prev_value
else:
del self.data[obj_type]
raise
as_multi_model_dataset
Returns:
Type | Description |
---|---|
'MultiModelDataset[ModelT]' |
View Source
def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':
multi_model_dataset = MultiModelDataset[self.get_model_class()]()
for obj_type in self:
multi_model_dataset.data[obj_type] = self.data[obj_type]
return multi_model_dataset
from_data
def from_data(
self,
data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, Any], Iterator[tuple[str, Any]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_data(self,
data: dict[str, Any] | Iterator[tuple[str, Any]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_data(obj_val)
self[obj_type] = new_model
from_json
def from_json(
self,
data: Union[dict[str, str], Iterator[tuple[str, str]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, str], Iterator[tuple[str, str]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_json(self,
data: dict[str, str] | Iterator[tuple[str, str]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_json(obj_val)
self[obj_type] = new_model
to_data
Returns:
Type | Description |
---|---|
dict[str, typing.Any] |
View Source
def to_data(self) -> dict[str, Any]:
return GenericModel.dict(self).get(DATA_KEY)
to_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
dict[str, str] |
View Source
def to_json(self, pretty=False) -> dict[str, str]:
result = {}
for key, val in self.to_data().items():
result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)
return result