from abc import ABC, abstractmethod
from os import PathLike
from .hdf5vector import HDF5VectorStorage
from typing import (
Any,
Generic,
Iterable,
Iterator,
Optional,
Sequence,
Tuple,
TypeVar,
)
from .base import InstanceProvider, Instance
from .vectorstorage import VectorStorage
from ..typehints import KT, DT, VT, RT, MT
import numpy.typing as npt
IT = TypeVar("IT", bound="Instance[Any, Any, Any, Any]")
[docs]class ExternalVectorInstanceProvider(
InstanceProvider[IT, KT, DT, VT, RT], ABC, Generic[IT, KT, DT, VT, RT, MT]
):
vectorstorage: Optional[VectorStorage[KT, VT, MT]]
[docs] @abstractmethod
def load_vectors(self) -> VectorStorage[KT, VT, MT]:
raise NotImplementedError
[docs] @abstractmethod
def bulk_add_vectors(
self, keys: Sequence[KT], values: Sequence[VT]
) -> None:
raise NotImplementedError
[docs] def bulk_get_vectors(
self, keys: Sequence[KT]
) -> Tuple[Sequence[KT], Sequence[VT]]:
if self.vectorstorage is None:
self.vectorstorage = self.load_vectors()
ret_keys, vectors = self.vectorstorage.get_vectors(keys)
return ret_keys, vectors
[docs] def vector_chunker_selector(
self, keys: Iterable[KT], batch_size: int = 200
) -> Iterator[Sequence[Tuple[KT, VT]]]:
if self.vectorstorage is None:
self.vectorstorage = self.load_vectors()
results = self.vectorstorage.get_vectors_zipped(list(keys), batch_size)
return results
[docs] def vector_chunker(
self, batch_size: int = 200
) -> Iterator[Sequence[Tuple[KT, VT]]]:
results = self.vector_chunker_selector(self.key_list, batch_size)
return results
[docs]class HDF5VectorInstanceProvider(
ExternalVectorInstanceProvider[
IT, KT, DT, npt.NDArray[Any], RT, npt.NDArray[Any]
],
Generic[IT, KT, DT, RT],
):
vector_storage_location: "PathLike[str]"
[docs] def load_vectors(self) -> HDF5VectorStorage[KT, Any]:
return HDF5VectorStorage[KT, Any](self.vector_storage_location)
[docs] def bulk_add_vectors(
self, keys: Sequence[KT], values: Sequence[npt.NDArray[Any]]
) -> None:
with HDF5VectorStorage[KT, Any](
self.vector_storage_location, "a"
) as writeable_storage:
writeable_storage.add_bulk(keys, values)
self.vectorstorage = self.load_vectors()