Module omnipy.modules.pandas.models
Overview
View Source
from collections.abc import Iterable
from typing import Any
from omnipy.data.dataset import Dataset
from omnipy.data.model import Model, ROOT_KEY
from . import pd
class PandasModel(Model[pd.DataFrame]):
@classmethod
def _parse_data(cls, data: pd.DataFrame) -> pd.DataFrame:
cls._data_column_names_are_strings(data)
cls._data_not_empty_object(data)
return data
@staticmethod
def _data_column_names_are_strings(data: pd.DataFrame) -> None:
for column in data.columns:
assert isinstance(column, str)
@staticmethod
def _data_not_empty_object(data: pd.DataFrame) -> None:
assert not any(data.isna().all(axis=1))
def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:
df = super().dict(*args, **kwargs)[ROOT_KEY]
df = df.replace({pd.NA: None})
return {ROOT_KEY: df.to_dict(orient='records')}
def from_data(self, value: Iterable[Any]) -> None:
self.contents = pd.DataFrame(value).convert_dtypes()
def from_json(self, value: str) -> None:
self.contents = pd.read_json(value).convert_dtypes()
class PandasDataset(Dataset[PandasModel]):
...
class ListOfPandasDatasetsWithSameNumberOfFiles(Model[list[PandasDataset]]):
@classmethod
def _parse_data(cls, dataset_list: list[PandasDataset]) -> Any:
assert len(dataset_list) >= 2
assert all(len(dataset) for dataset in dataset_list)
Variables
Classes
ListOfPandasDatasetsWithSameNumberOfFiles
class ListOfPandasDatasetsWithSameNumberOfFiles(
value: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
*,
__root__: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
**data: Any
)
A data model containing a value parsed according to the model.
If no value is provided, the value is set to the default value of the data model, found by
calling the model class without parameters, e.g. int()
.
Model is a generic class that cannot be instantiated directly. Instead, a Model class needs to be specialized with a data type before Model objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.
Example data model specialized as a class alias::
MyNumberList = Model[list[int]]
... alternatively as a Model subclass::
class MyNumberList(Model[list[int]]):
pass
Once instantiated, a Model object functions as a parser, e.g.::
my_number_list = MyNumberList([2,3,4])
my_number_list.contents = ['3', 4, True]
assert my_number_list.contents == [3,4,1]
While the following should raise a ValidationError
::
my_number_list.contents = ['abc', 'def']
The Model class is a wrapper class around the powerful GenericModel
class from pydantic.
See also docs of the Dataset class for more usage examples.
View Source
class ListOfPandasDatasetsWithSameNumberOfFiles(Model[list[PandasDataset]]):
@classmethod
def _parse_data(cls, dataset_list: list[PandasDataset]) -> Any:
assert len(dataset_list) >= 2
assert all(len(dataset) for dataset in dataset_list)
Class variables
Static methods
to_json_schema
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
View Source
@classmethod
def to_json_schema(cls, pretty=False) -> str:
schema = cls.schema()
if pretty:
return cls._pretty_print_json(schema)
else:
return json.dumps(schema)
validate
Hack to allow overwriting of iter method without compromising pydantic validation. Part
of the pydantic API and not the Omnipy API.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
Any |
Returns:
Type | Description |
---|---|
Model |
View Source
@classmethod
def validate(cls: Type['Model'], value: Any) -> 'Model':
"""
Hack to allow overwriting of __iter__ method without compromising pydantic validation. Part
of the pydantic API and not the Omnipy API.
"""
if isinstance(value, Model):
with AttribHolder(value, '__iter__', GenericModel.__iter__, on_class=True):
return super().validate(value)
else:
return super().validate(value)
Instance variables
Methods
delitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
object |
||
kwargs |
object |
Returns:
Type | Description |
---|---|
object |
View Source
def _method(cls_or_self, /, *args, **keywords):
keywords = {**self.keywords, **keywords}
return self.func(cls_or_self, *self.args, *args, **keywords)
eq
Return self==value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
object |
Returns:
Type | Description |
---|---|
bool |
View Source
def __eq__(self, other: object) -> bool:
return isinstance(other, Model) \
and self.__class__ == other.__class__ \
and self.contents == other.contents \
and self.to_data() == other.to_data() # last is probably unnecessary, but just in case
getattr
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
Returns:
Type | Description |
---|---|
Any |
View Source
def __getattr__(self, attr: str) -> Any:
ret = self._getattr_from_contents(attr)
if callable(ret):
ret = add_callback_after_call(ret, self.validate_contents)
return ret
iter
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
object |
||
kwargs |
object |
Returns:
Type | Description |
---|---|
object |
View Source
def _method(cls_or_self, /, *args, **keywords):
keywords = {**self.keywords, **keywords}
return self.func(cls_or_self, *self.args, *args, **keywords)
setattr
Implement setattr(self, name, value).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
||
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setattr__(self, attr: str, value: Any) -> None:
if attr in self.__dict__ and attr not in [ROOT_KEY]:
super().__setattr__(attr, value)
else:
if attr in ['contents']:
contents_prop = getattr(self.__class__, attr)
contents_prop.__set__(self, value)
else:
raise RuntimeError('Model does not allow setting of extra attributes')
setitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
object |
||
kwargs |
object |
Returns:
Type | Description |
---|---|
object |
View Source
def _method(cls_or_self, /, *args, **keywords):
keywords = {**self.keywords, **keywords}
return self.func(cls_or_self, *self.args, *args, **keywords)
from_data
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_data(self, value: Any) -> None:
self.contents = value
from_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
json_contents |
str |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_json(self, json_contents: str) -> None:
new_model = self.parse_raw(json_contents, proto=pydantic_protocol.json)
self._set_contents_without_validation(new_model)
inner_type
Parameters:
Name | Type | Description | Default |
---|---|---|---|
with_args |
bool |
Returns:
Type | Description |
---|---|
type |
None |
View Source
def inner_type(self, with_args: bool = False) -> type | None:
return self.__class__._get_root_type(outer=False, with_args=with_args)
is_nested_type
Returns:
Type | Description |
---|---|
bool |
View Source
def is_nested_type(self) -> bool:
return not self.inner_type(with_args=True) == self.outer_type(with_args=True)
outer_type
Parameters:
Name | Type | Description | Default |
---|---|---|---|
with_args |
bool |
Returns:
Type | Description |
---|---|
type |
None |
View Source
def outer_type(self, with_args: bool = False) -> type | None:
return self.__class__._get_root_type(outer=True, with_args=with_args)
to_data
Returns:
Type | Description |
---|---|
Any |
View Source
def to_data(self) -> Any:
return self.dict()[ROOT_KEY]
to_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
View Source
def to_json(self, pretty=False) -> str:
json_content = self.json()
if pretty:
return self._pretty_print_json(json.loads(json_content))
else:
return json_content
validate_contents
View Source
def validate_contents(self):
self.contents = self.contents
PandasDataset
class PandasDataset(
value: Union[dict[str, object], Iterator[tuple[str, object]], pydantic.fields.UndefinedType] = PydanticUndefined,
*,
data: dict[str, object] | pydantic.fields.UndefinedType = PydanticUndefined,
**input_data: object
)
Dict-based container of data files that follow a specific Model
Dataset is a generic class that cannot be instantiated directly. Instead, a Dataset class needs to be specialized with a data model before Dataset objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.
The specialization must be done through the use of Model, either directly, e.g.::
MyDataset = Dataset[Model[dict[str, list[int]]])
... or indirectly, using a Model subclass, e.g.::
class MyModel(Model[dict[str, list[int]]):
pass
MyDataset = Dataset[MyModel]
... alternatively through the specification of a Dataset subclass::
class MyDataset(Dataset[MyModel]):
pass
The specialization can also be done in a more deeply nested structure, e.g.::
class MyNumberList(Model[list[int]]):
pass
class MyToplevelDict(Model[dict[str, MyNumberList]]):
pass
class MyDataset(Dataset[MyToplevelDict]):
pass
Once instantiated, a dataset object functions as a dict of data files, with the keys referring to the data file names and the contents to the data file contents, e.g.::
MyNumberListDataset = Dataset[Model[list[int]]]
my_dataset = MyNumberListDataset({'file_1': [1,2,3]})
my_dataset['file_2'] = [2,3,4]
print(my_dataset.keys())
The Dataset class is a wrapper class around the powerful GenericModel
class from pydantic.
View Source
class PandasDataset(Dataset[PandasModel]):
...
Class variables
Static methods
get_model_class
Returns the concrete Model class used for all data files in the dataset, e.g.:
Model[list[int]]
Returns:
Type | Description |
---|---|
Type[Model] |
The concrete Model class used for all data files in the dataset |
View Source
@classmethod
def get_model_class(cls) -> Type[Model]:
"""
Returns the concrete Model class used for all data files in the dataset, e.g.:
`Model[list[int]]`
:return: The concrete Model class used for all data files in the dataset
"""
model_type = cls.__fields__.get(DATA_KEY).type_
return cls._origmodel_if_annotated_optional(model_type)
to_json_schema
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
dict[str, str] |
View Source
@classmethod
def to_json_schema(cls, pretty=False) -> str | dict[str, str]:
result = {}
schema = cls.schema()
for key, val in schema['properties']['data'].items():
result[key] = val
result['title'] = schema['title']
result['definitions'] = schema['definitions']
if pretty:
return cls._pretty_print_json(result)
else:
return json.dumps(result)
Methods
eq
Return self==value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
object |
Returns:
Type | Description |
---|---|
bool |
View Source
def __eq__(self, other: object) -> bool:
# return self.__class__ == other.__class__ and super().__eq__(other)
return isinstance(other, Dataset) \
and self.__class__ == other.__class__ \
and self.data == other.data \
and self.to_data() == other.to_data() # last is probably unnecessary, but just in case
iter
so dict(model)
works
Returns:
Type | Description |
---|---|
Iterator |
View Source
def __iter__(self) -> Iterator:
return UserDict.__iter__(self)
setattr
Implement setattr(self, name, value).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
||
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setattr__(self, attr: str, value: Any) -> None:
if attr in self.__dict__ or attr == DATA_KEY or attr.startswith('__'):
super().__setattr__(attr, value)
else:
raise RuntimeError('Model does not allow setting of extra attributes')
setitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
obj_type |
str |
||
data_obj |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setitem__(self, obj_type: str, data_obj: Any) -> None:
has_prev_value = obj_type in self.data
prev_value = self.data.get(obj_type)
try:
self.data[obj_type] = data_obj
self._validate(obj_type)
except: # noqa
if has_prev_value:
self.data[obj_type] = prev_value
else:
del self.data[obj_type]
raise
as_multi_model_dataset
Returns:
Type | Description |
---|---|
'MultiModelDataset[ModelT]' |
View Source
def as_multi_model_dataset(self) -> 'MultiModelDataset[ModelT]':
multi_model_dataset = MultiModelDataset[self.get_model_class()]()
for obj_type in self:
multi_model_dataset.data[obj_type] = self.data[obj_type]
return multi_model_dataset
from_data
def from_data(
self,
data: Union[dict[str, Any], Iterator[tuple[str, Any]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, Any], Iterator[tuple[str, Any]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_data(self,
data: dict[str, Any] | Iterator[tuple[str, Any]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_data(obj_val)
self[obj_type] = new_model
from_json
def from_json(
self,
data: Union[dict[str, str], Iterator[tuple[str, str]]],
update: bool = True
) -> None
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
Union[dict[str, str], Iterator[tuple[str, str]]] |
||
update |
bool |
True |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_json(self,
data: dict[str, str] | Iterator[tuple[str, str]],
update: bool = True) -> None:
if not isinstance(data, dict):
data = dict(data)
if not update:
self.clear()
for obj_type, obj_val in data.items():
new_model = self.get_model_class()() # noqa
new_model.from_json(obj_val)
self[obj_type] = new_model
to_data
Returns:
Type | Description |
---|---|
dict[str, typing.Any] |
View Source
def to_data(self) -> dict[str, Any]:
return GenericModel.dict(self).get(DATA_KEY)
to_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
dict[str, str] |
View Source
def to_json(self, pretty=False) -> dict[str, str]:
result = {}
for key, val in self.to_data().items():
result[key] = self._pretty_print_json(val) if pretty else json.dumps(val)
return result
PandasModel
class PandasModel(
value: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
*,
__root__: Union[Any, pydantic.fields.UndefinedType] = PydanticUndefined,
**data: Any
)
A data model containing a value parsed according to the model.
If no value is provided, the value is set to the default value of the data model, found by
calling the model class without parameters, e.g. int()
.
Model is a generic class that cannot be instantiated directly. Instead, a Model class needs to be specialized with a data type before Model objects can be instantiated. A data model functions as a data parser and guarantees that the parsed data follows the specified model.
Example data model specialized as a class alias::
MyNumberList = Model[list[int]]
... alternatively as a Model subclass::
class MyNumberList(Model[list[int]]):
pass
Once instantiated, a Model object functions as a parser, e.g.::
my_number_list = MyNumberList([2,3,4])
my_number_list.contents = ['3', 4, True]
assert my_number_list.contents == [3,4,1]
While the following should raise a ValidationError
::
my_number_list.contents = ['abc', 'def']
The Model class is a wrapper class around the powerful GenericModel
class from pydantic.
See also docs of the Dataset class for more usage examples.
View Source
class PandasModel(Model[pd.DataFrame]):
@classmethod
def _parse_data(cls, data: pd.DataFrame) -> pd.DataFrame:
cls._data_column_names_are_strings(data)
cls._data_not_empty_object(data)
return data
@staticmethod
def _data_column_names_are_strings(data: pd.DataFrame) -> None:
for column in data.columns:
assert isinstance(column, str)
@staticmethod
def _data_not_empty_object(data: pd.DataFrame) -> None:
assert not any(data.isna().all(axis=1))
def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:
df = super().dict(*args, **kwargs)[ROOT_KEY]
df = df.replace({pd.NA: None})
return {ROOT_KEY: df.to_dict(orient='records')}
def from_data(self, value: Iterable[Any]) -> None:
self.contents = pd.DataFrame(value).convert_dtypes()
def from_json(self, value: str) -> None:
self.contents = pd.read_json(value).convert_dtypes()
Class variables
Static methods
to_json_schema
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
View Source
@classmethod
def to_json_schema(cls, pretty=False) -> str:
schema = cls.schema()
if pretty:
return cls._pretty_print_json(schema)
else:
return json.dumps(schema)
validate
Hack to allow overwriting of iter method without compromising pydantic validation. Part
of the pydantic API and not the Omnipy API.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
Any |
Returns:
Type | Description |
---|---|
Model |
View Source
@classmethod
def validate(cls: Type['Model'], value: Any) -> 'Model':
"""
Hack to allow overwriting of __iter__ method without compromising pydantic validation. Part
of the pydantic API and not the Omnipy API.
"""
if isinstance(value, Model):
with AttribHolder(value, '__iter__', GenericModel.__iter__, on_class=True):
return super().validate(value)
else:
return super().validate(value)
Instance variables
Methods
delitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
object |
||
kwargs |
object |
Returns:
Type | Description |
---|---|
object |
View Source
def _method(cls_or_self, /, *args, **keywords):
keywords = {**self.keywords, **keywords}
return self.func(cls_or_self, *self.args, *args, **keywords)
eq
Return self==value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
object |
Returns:
Type | Description |
---|---|
bool |
View Source
def __eq__(self, other: object) -> bool:
return isinstance(other, Model) \
and self.__class__ == other.__class__ \
and self.contents == other.contents \
and self.to_data() == other.to_data() # last is probably unnecessary, but just in case
getattr
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
Returns:
Type | Description |
---|---|
Any |
View Source
def __getattr__(self, attr: str) -> Any:
ret = self._getattr_from_contents(attr)
if callable(ret):
ret = add_callback_after_call(ret, self.validate_contents)
return ret
iter
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
object |
||
kwargs |
object |
Returns:
Type | Description |
---|---|
object |
View Source
def _method(cls_or_self, /, *args, **keywords):
keywords = {**self.keywords, **keywords}
return self.func(cls_or_self, *self.args, *args, **keywords)
setattr
Implement setattr(self, name, value).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
attr |
str |
||
value |
Any |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def __setattr__(self, attr: str, value: Any) -> None:
if attr in self.__dict__ and attr not in [ROOT_KEY]:
super().__setattr__(attr, value)
else:
if attr in ['contents']:
contents_prop = getattr(self.__class__, attr)
contents_prop.__set__(self, value)
else:
raise RuntimeError('Model does not allow setting of extra attributes')
setitem
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
object |
||
kwargs |
object |
Returns:
Type | Description |
---|---|
object |
View Source
def _method(cls_or_self, /, *args, **keywords):
keywords = {**self.keywords, **keywords}
return self.func(cls_or_self, *self.args, *args, **keywords)
dict
Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
args |
|||
kwargs |
Returns:
Type | Description |
---|---|
dict[str, dict[typing.Any, typing.Any]] |
View Source
def dict(self, *args, **kwargs) -> dict[str, dict[Any, Any]]:
df = super().dict(*args, **kwargs)[ROOT_KEY]
df = df.replace({pd.NA: None})
return {ROOT_KEY: df.to_dict(orient='records')}
from_data
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
collections.abc.Iterable[typing.Any] |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_data(self, value: Iterable[Any]) -> None:
self.contents = pd.DataFrame(value).convert_dtypes()
from_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value |
str |
Returns:
Type | Description |
---|---|
NoneType |
View Source
def from_json(self, value: str) -> None:
self.contents = pd.read_json(value).convert_dtypes()
inner_type
Parameters:
Name | Type | Description | Default |
---|---|---|---|
with_args |
bool |
Returns:
Type | Description |
---|---|
type |
None |
View Source
def inner_type(self, with_args: bool = False) -> type | None:
return self.__class__._get_root_type(outer=False, with_args=with_args)
is_nested_type
Returns:
Type | Description |
---|---|
bool |
View Source
def is_nested_type(self) -> bool:
return not self.inner_type(with_args=True) == self.outer_type(with_args=True)
outer_type
Parameters:
Name | Type | Description | Default |
---|---|---|---|
with_args |
bool |
Returns:
Type | Description |
---|---|
type |
None |
View Source
def outer_type(self, with_args: bool = False) -> type | None:
return self.__class__._get_root_type(outer=True, with_args=with_args)
to_data
Returns:
Type | Description |
---|---|
Any |
View Source
def to_data(self) -> Any:
return self.dict()[ROOT_KEY]
to_json
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pretty |
Returns:
Type | Description |
---|---|
str |
View Source
def to_json(self, pretty=False) -> str:
json_content = self.json()
if pretty:
return self._pretty_print_json(json.loads(json_content))
else:
return json_content
validate_contents
View Source
def validate_contents(self):
self.contents = self.contents