Source code for instancelib.instances.memoryvectorstorage

from __future__ import annotations

from ..utils.chunks import divide_iterable_in_lists
from .vectorstorage import VectorStorage

from ..typehints import KT, VT, MT

import numpy as np
import numpy.typing as npt

from typing import (
    Any,
    Generic,
    Sequence,
    MutableMapping,
    Callable,
    Iterator,
    Union,
    Tuple,
)


[docs]class MemoryVectorStorage(VectorStorage[KT, VT, MT], Generic[KT, VT, MT]): from_matrix: Callable[[MT], Sequence[VT]] to_matrix: Callable[[Sequence[VT]], MT] def __init__( self, storage: MutableMapping[KT, VT], from_matrix: Callable[[MT], Sequence[VT]], to_matrix: Callable[[Sequence[VT]], MT], ) -> None: self.storage = storage self.to_matrix = to_matrix self.from_matrix = from_matrix
[docs] def writeable(self) -> bool: return True
def __getitem__(self, k: KT) -> VT: return self.storage[k] def __setitem__(self, k: KT, value: VT) -> None: self.storage[k] = value def __contains__(self, item: object) -> bool: return item in self.storage def __iter__(self) -> Iterator[KT]: return iter(self.storage) def __len__(self) -> int: return len(self.storage) def __delitem__(self, __v: KT) -> None: del self.storage[__v]
[docs] def add_bulk( self, input_keys: Sequence[KT], input_values: Sequence[VT] ) -> None: for key, value in zip(input_keys, input_values): self.storage[key] = value
[docs] def get_matrix(self, keys: Sequence[KT]) -> Tuple[Sequence[KT], MT]: vectors = [self.storage[key] for key in keys] matrix = self.to_matrix(vectors) return keys, matrix
[docs] def add_bulk_matrix(self, input_keys: Sequence[KT], matrix: MT) -> None: vectors = self.from_matrix(matrix) for key, vector in zip(input_keys, vectors): self.storage[key] = vector
[docs] def get_vectors( self, keys: Sequence[KT] ) -> Tuple[Sequence[KT], Sequence[VT]]: """Return the vectors that correspond with the `keys` Parameters ---------- keys : Sequence[KT] A list of identifier keys Returns ------- Tuple[Sequence[KT], Sequence[VT]] A tuple containing two lists: - A list with identifier (order may differ from `keys` argument) - A list with vectors """ vectors = [self[key] for key in keys] return keys, vectors
[docs] def get_matrix_chunked(self, keys: Sequence[KT], chunk_size: int) -> Iterator[Tuple[Sequence[KT], MT]]: chunks = divide_iterable_in_lists(keys, chunk_size) for chunk in chunks: vectors = [self[key] for key in chunk] matrix = self.to_matrix(vectors) yield chunk, matrix
[docs] def matrices_chunker( self, chunk_size: int = 200 ) -> Iterator[Tuple[Sequence[KT], MT]]: chunks = divide_iterable_in_lists(list(self.keys()), chunk_size) for chunk in chunks: vectors = [self[key] for key in chunk] matrix = self.to_matrix(vectors) yield chunk, matrix
[docs] def get_vectors_chunked( self, keys: Sequence[KT], chunk_size: int = 200 ) -> Iterator[Tuple[Sequence[KT], Sequence[VT]]]: """Return vectors in chunks of `chunk_size` containing the vectors requested in `keys` Parameters ---------- keys : Sequence[KT] A list of identifier keys chunk_size : int, optional The size of the chunks, by default 200 Yields ------- Tuple[Sequence[KT], Sequence[VT]] A tuple containing two lists: - A list with identifiers (order may differ from `keys` argument) - A list with vectors """ chunks = divide_iterable_in_lists(keys, chunk_size) for chunk in chunks: vectors = [self[key] for key in chunk] yield chunk, vectors
[docs] def get_vectors_zipped( self, keys: Sequence[KT], chunk_size: int = 200 ) -> Iterator[Sequence[Tuple[KT, VT]]]: chunks = divide_iterable_in_lists(keys, chunk_size) for chunk in chunks: tuples = [(key, self[key]) for key in chunk] yield tuples
def __enter__(self) -> VectorStorage[KT, VT, MT]: return self def __exit__(self, type: Any, value: Any, traceback: Any) -> None: pass
[docs] @classmethod def create( cls, from_matrix: Callable[[MT], Sequence[VT]], to_matrix: Callable[[Sequence[VT]], MT], ) -> MemoryVectorStorage[KT, VT, MT]: storage = dict() return cls(storage, from_matrix, to_matrix)
[docs] def reload(self) -> None: """Reload the index from disk""" pass
[docs]class NumpyFromMatrix: def __call__(self, matrix: npt.NDArray[Any]) -> Sequence[npt.NDArray[Any]]: return list(matrix)
[docs]class NumpyToMatrix: def __call__( self, vectors: Sequence[npt.NDArray[Any]] ) -> npt.NDArray[Any]: return np.vstack(vectors)
[docs]class NumpyMemoryStorage( MemoryVectorStorage[KT, npt.NDArray[Any], npt.NDArray[Any]], Generic[KT] ): def __init__(self, storage: MutableMapping[KT, npt.NDArray[Any]]) -> None: from_matrix = NumpyFromMatrix() to_matrix = NumpyToMatrix() super().__init__(storage, from_matrix, to_matrix)
[docs] @classmethod def create(cls) -> NumpyMemoryStorage[KT]: return cls(dict())