instancelib.instances.hdf5pandas module

class instancelib.instances.hdf5pandas.HDF5TextInstance(identifier, data, vector, representation=None, tokenized=None, map_to_original=None, split_marker=None, external=None)[source]

Bases: DataPoint[Union[int, str], str, ndarray, str], TextInstance[Union[int, str], ndarray]

Parameters:
property map_to_original: ndarray[Any, dtype[Any]] | None
property split_marker: Any | None
property tokenized: Sequence[str] | None
class instancelib.instances.hdf5pandas.HDF5TextProvider(data_storage, vector_storage_location, hdf5_dataset, id_col, data_cols)[source]

Bases: HDF5VectorInstanceProvider[HDF5TextInstance, Union[int, str], str, str], ExternalProvider[HDF5TextInstance, Union[int, str], str, ndarray, str]

Parameters:
  • data_storage (PathLike[str]) –

  • vector_storage_location (PathLike[str]) –

  • hdf5_dataset (str) –

  • id_col (str) –

  • data_cols (Sequence[str]) –

build_from_external(k)[source]
Parameters:

k (Union[int, str]) –

Return type:

HDF5TextInstance

clear()[source]
Return type:

None

property dataframe: DataFrame
property empty: bool

Determines if the provider does not contain instances

Returns:

True if the provider is empty

Return type:

bool

get_all()[source]

Get an iterator that iterates over all instances

Yields:

InstanceType – An iterator that iterates over all instances

update_external(ins)[source]
Parameters:

ins (Instance[Union[int, str], str, ndarray[Any, dtype[Any]], str]) –

Return type:

None

vector_storage_location: PathLike[str]
vectorstorage: Optional[VectorStorage[KT, VT, MT]]