@TaskTemplate()
def join_tables(table_1: PandasModel,
table_2: PandasModel,
join_type: str = 'outer',
on_cols: Sequence[str] | Mapping[str, str] | None = None) -> PandasModel:
from .lazy_import import pd
if join_type == 'cross':
raise ValueError('join_type="cross" not supported. Please use "cartesian_product" task.')
assert join_type in ['inner', 'outer', 'left', 'right']
common_colnames = extract_common_colnames(table_1, table_2)
if (on_cols is None and len(common_colnames) == 0) \
or (on_cols is not None and len(on_cols) == 0):
raise ValueError(f'No common column names were found. '
f'table_1: {tuple(table_1.columns)}. '
f'table_2: {tuple(table_2.columns)}. '
f'on_cols: {on_cols}')
on = None
left_on = None
right_on = None
if on_cols is None:
on = common_colnames
elif isinstance(on_cols, Mapping):
left_on = tuple(on_cols.keys())
right_on = tuple(on_cols.values())
else:
on = on_cols
column_info = f'common columns: {on}' if on is not None \
else f'column mappings: {tuple(on_cols.items())}'
print(f'Joining tables on {column_info}, using join type: {join_type}...')
merged_df = pd.merge(
table_1.loc[:, :],
table_2.loc[:, :],
on=on,
left_on=left_on,
right_on=right_on,
how=join_type,
suffixes=('_1', '_2'),
).convert_dtypes()
return PandasModel(merged_df)