Skip to content

Module omnipy.modules.json.tasks

Overview

View Source
from collections import defaultdict

from typing import cast

from omnipy.compute.flow import FuncFlowTemplate

from omnipy.compute.task import TaskTemplate

from omnipy.compute.typing import mypy_fix_func_flow_template, mypy_fix_task_template

from omnipy.data.dataset import Dataset

from omnipy.data.model import Model

from .datasets import (JsonDataset,

                       JsonDictDataset,

                       JsonDictOfDictsDataset,

                       JsonDictOfListsOfDictsDataset,

                       JsonListDataset,

                       JsonListOfDictsDataset,

                       JsonListOfDictsOfScalarsDataset)

from .functions import flatten_outer_level_of_nested_record

from .models import JsonListModel

from .typedefs import (JsonDictOfListsOfDicts,

                       JsonDictOfScalars,

                       JsonListOfDicts,

                       JsonListOfDictsOfScalars)

ID_KEY = '_omnipy_id'

REF_KEY = '_omnipy_ref'

DEFAULT_KEY = '__root__'

@mypy_fix_task_template

@TaskTemplate()

def convert_dataset_string_to_json(dataset: Dataset[Model[str]]) -> JsonDataset:

    json_dataset = JsonDataset()

    json_dataset.from_json(dataset.to_data())

    return json_dataset

@mypy_fix_task_template

@TaskTemplate()

def transpose_dicts_2_lists(dataset: JsonDictDataset, id_key: str = ID_KEY) -> JsonListDataset:

    output_dataset = JsonListDataset()

    for name, item in dataset.items():

        for key, val in item.items():

            if key not in output_dataset:

                output_dataset[key] = []

            if not val.outer_type() == list:

                val = JsonListModel([val])

            for item_index, val_item in enumerate(val):

                if val_item.outer_type() == dict:

                    output_dataset[key].append({id_key: f'{name}_{item_index}'})

                    assert id_key not in val_item

                    output_dataset[key][-1] |= val_item

                else:

                    output_dataset[key].append(val_item)

    return output_dataset

@mypy_fix_func_flow_template

@FuncFlowTemplate()

def transpose_dict_of_dicts_2_list_of_dicts(

    dataset: JsonDictOfDictsDataset,

    id_key: str = ID_KEY,

) -> JsonListOfDictsDataset:

    output_dataset = JsonListOfDictsDataset()

    output_dataset |= transpose_dicts_2_lists(dataset, id_key=id_key)

    return output_dataset

@mypy_fix_func_flow_template

@FuncFlowTemplate()

def transpose_dicts_of_lists_of_dicts_2_lists_of_dicts(

    dataset: JsonDictOfListsOfDictsDataset,

    id_key: str = ID_KEY,

) -> JsonListOfDictsDataset:

    output_dataset = JsonListOfDictsDataset()

    output_dataset |= transpose_dicts_2_lists(dataset, id_key=id_key)

    return output_dataset

@mypy_fix_task_template

@TaskTemplate()

def flatten_outer_level_of_all_data_files(

        dataset: JsonListOfDictsDataset, id_key: str, ref_key: str,

        default_key: str) -> tuple[JsonListOfDictsOfScalarsDataset, JsonListOfDictsDataset]:

    data_files_of_scalar_records: defaultdict[str, JsonListOfDictsOfScalars] = \

        defaultdict(JsonListOfDictsOfScalars)

    data_files_of_any: defaultdict[str, JsonListOfDicts] = defaultdict(JsonListOfDicts)

    dataset_as_data: JsonDictOfListsOfDicts = \

        cast(JsonDictOfListsOfDicts, dataset.to_data())

    for data_file_title, item in dataset_as_data.items():

        data_file: JsonListOfDicts = item

        if len(data_file) == 0:

            data_files_of_scalar_records[data_file_title] = JsonListOfDictsOfScalars()

        for record_id, nested_record in enumerate(data_file):

            record_of_scalars: JsonDictOfScalars

            new_data_files_of_any: JsonDictOfListsOfDicts

            record_of_scalars, new_data_files_of_any = flatten_outer_level_of_nested_record(

                nested_record,

                str(record_id),

                data_file_title,

                id_key,

                ref_key,

                default_key,

            )

            new_data_file_title: str

            new_data_file_of_any: JsonListOfDicts

            for new_data_file_title, new_data_file_of_any in new_data_files_of_any.items():

                data_files_of_any[new_data_file_title] += new_data_file_of_any

            data_files_of_scalar_records[data_file_title].append(record_of_scalars)

    data_files_of_scalar_records_ds = JsonListOfDictsOfScalarsDataset(data_files_of_scalar_records)

    data_files_of_any_ds = JsonListOfDictsDataset(data_files_of_any)

    return data_files_of_scalar_records_ds, data_files_of_any_ds

Variables

DEFAULT_KEY
ID_KEY
REF_KEY
convert_dataset_string_to_json
flatten_outer_level_of_all_data_files
transpose_dict_of_dicts_2_list_of_dicts
transpose_dicts_2_lists
transpose_dicts_of_lists_of_dicts_2_lists_of_dicts