# Copyright (C) 2021 The InstanceLib Authors. All Rights Reserved.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import (Any, Callable, Generic, Iterable, Iterator, List, Mapping,
MutableMapping, Optional, Sequence, Set, Tuple, TypeVar,
Union)
from .combination import UpdateHookInstance
from .extractors import ColumnExtractor, DataExtractor
from .memoryvectorstorage import MemoryVectorStorage
from ..typehints import DT, KT, MT, RT, VT
from .base import Instance, InstanceProvider, ROInstanceProvider
from .vectorstorage import VectorStorage
from .children import MemoryChildrenMixin
IT = TypeVar("IT", bound="UpdateHookInstance[Any, Any, Any, Any]")
[docs]class RowInstance(Mapping[str, Any], Instance[KT, Mapping[str,Any], VT, Mapping[str, Any]], Generic[IT, KT, DT, VT, RT, MT]):
def __init__(self,
provider: "TableProvider[IT, KT, DT, VT, RT, MT]",
data: Mapping[str, Any],
vector: Optional[VT] = None,
) -> None:
self._provider = provider
self._data = data
self._vector = vector
def __getitem__(self, __k: str) -> Any:
return self.data[__k]
def __contains__(self, __o: object) -> bool:
return __o in self.data
def __iter__(self) -> Iterator[str]:
return iter(self.data)
@property
def columns(self) -> Sequence[str]:
return list(self.data.keys())
@property
def data(self) -> Mapping[str, Any]:
return self._data # type: ignore
@property
def representation(self) -> Mapping[str, Any]:
return self._data
@property
def vector(self) -> Optional[VT]:
return self._vector # type: ignore
@vector.setter
def vector(self, value: Optional[VT]) -> None:
if value is not None:
self._vector = value
self._provider.vectors[self.identifier] = value
[docs]class TableInstance(MutableMapping[str, Any],
UpdateHookInstance[KT, DT, VT, RT],
Generic[IT, KT, DT, VT, RT, MT]):
_data_extractor: DataExtractor[DT]
_repr_extractor: DataExtractor[RT]
def __init__(self,
identifier: KT,
data: MutableMapping[str, Any],
vector: Optional[VT] = None,
data_extractor: DataExtractor[DT] = ColumnExtractor("data"),
repr_extractor: DataExtractor[RT] = ColumnExtractor("data")
) -> None:
self._identifier = identifier
self._data = data
self._vector = vector
self._data_extractor = data_extractor
self._repr_extractor = repr_extractor
self._delete_hook = None
self._update_hook = None
def __getitem__(self, __k: str) -> Any:
return self._data[__k]
def __contains__(self, __o: object) -> bool:
return __o in self._data
def __iter__(self) -> Iterator[str]:
return iter(self._data)
def __len__(self) -> int:
return len(self._data)
def __setitem__(self, __k: str, __v: Any) -> None:
self._data[__k] = __v
self.update_hook()
def __delitem__(self, __v: str) -> None:
del self._data[__v]
self.update_hook()
def _safe_get(self, key: str) -> Optional[Any]:
if key in self:
return self[key]
return None
@property
def identifier(self) -> KT:
return self._identifier
@property
def columns(self) -> Sequence[str]:
return list(self._data.keys())
@property
def data(self) -> DT:
return self._data_extractor(self._data)
@property
def representation(self) -> RT:
return self._repr_extractor(self._data)
@property
def vector(self) -> Optional[VT]:
return self._vector # type: ignore
@vector.setter
def vector(self, value: Optional[VT]) -> None:
if value is not None:
self._vector = value
self.update_hook()
[docs]class TableProviderRO(ROInstanceProvider[IT, KT, DT, VT, RT], Generic[IT,KT, DT, VT, RT, MT]):
columns: Sequence[str]
storage: Mapping[KT, Mapping[str, Any]]
builder: Callable[[KT, Mapping[str, Any], Optional[VT]], IT]
vectors: VectorStorage[KT, VT, MT]
def __init__(self,
storage: MutableMapping[KT, MutableMapping[str, Any]],
columns: Sequence[str],
vectors: VectorStorage[KT, VT, MT],
builder: Callable[[KT, Mapping[str, Any], Optional[VT]], IT],
):
self.storage = storage
self.columns = columns
self.vectors = vectors
self.builder = builder
def _decompose(self, ins: TableInstance[IT, KT, DT, VT, RT, MT]) -> Tuple[KT, Mapping[str, Any], Optional[VT]]:
return ins.identifier, ins._data, ins.vector
def __iter__(self) -> Iterator[KT]:
yield from self.storage.keys()
def _get_vector(self, key: KT) -> Optional[VT]:
if key in self.vectors:
return self.vectors[key]
return None
def __getitem__(self, key: KT) -> IT:
data = self.storage[key]
vector = self._get_vector(key)
ins = self.builder(key, data, vector)
return ins
def __len__(self) -> int:
return len(self.storage)
def __contains__(self, key: object) -> bool:
return key in self.storage
@property
def empty(self) -> bool:
return not self.storage
[docs] def get_all(self) -> Iterator[IT]:
yield from list(self.values())
[docs] def clear(self) -> None:
self.storage = dict()
[docs] def bulk_get_vectors(self, keys: Sequence[KT]) -> Tuple[Sequence[KT], Sequence[VT]]:
return self.vectors.get_vectors(keys)
[docs] def vector_chunker_selector(self, keys: Iterable[KT], batch_size: int = 200) -> Iterator[Sequence[Tuple[KT, VT]]]:
return self.vectors.get_vectors_zipped(list(keys), batch_size)
[docs] def bulk_get_all(self) -> List[IT]:
return list(self.get_all())
[docs]class TableProvider(MemoryChildrenMixin[IT, KT, DT, VT, RT],
TableProviderRO[IT, KT, DT, VT, RT, MT],
InstanceProvider[IT, KT, DT, VT, RT],
Generic[IT, KT, DT, VT, RT, MT]):
storage: MutableMapping[KT, MutableMapping[str, Any]]
builder: Callable[[InstanceProvider[IT, KT, DT, VT, RT], KT, Mapping[str, Any], Optional[VT]], IT]
def __init__(self,
storage: MutableMapping[KT, MutableMapping[str, Any]],
columns: Sequence[str],
vectors: VectorStorage[KT, VT, MT],
builder: Callable[[ROInstanceProvider[IT, KT, DT, VT, RT], KT, Mapping[str, Any], Optional[VT]], IT],
children: MutableMapping[KT, Set[KT]],
parents: MutableMapping[KT, KT]):
self.storage = storage
self.columns = columns
self.vectors = vectors
self.children = children
self.parents = parents
self.builder = builder
def __getitem__(self, __k: KT) -> IT:
ins = super().__getitem__(__k)
ins.register_hook(self._update)
return ins
def _decompose(self, ins: TableInstance[IT, KT, DT, VT, RT, MT]) -> Tuple[KT, Mapping[str, Any], Optional[VT]]:
return ins.identifier, ins._data, ins.vector
def _update(self, ins: IT) -> None:
raise NotImplementedError
def __setitem__(self, key: KT, value: IT) -> None:
assert isinstance(value, TableInstance)
idx, data, vector = self._decompose(value)
assert idx == key, f"Identifier -- Key mismatch: {idx} != {key}"
assert isinstance(data, MutableMapping)
self.storage[key] = data
if vector is not None:
self.vectors[key] = vector
def __delitem__(self, key: KT) -> None:
del self.storage[key]
[docs] def construct(*args: Any, **kwargs: Any) -> IT:
raise NotImplementedError
[docs] def create(self, *args: Any, **kwargs: Any) -> IT:
raise NotImplementedError