instancelib.ingest.qrel module

class instancelib.ingest.qrel.Qrel(topic, doc_id, relevancy)[source]

Bases: object

Parameters:
  • topic (str) –

  • doc_id (str) –

  • relevancy (int) –

doc_id: str
relevancy: int
topic: str
class instancelib.ingest.qrel.TrecDataset(docids, texts, qrels, topics, pos_label='Relevant', neg_label='Irrelevant')[source]

Bases: object

Parameters:
classmethod from_path(base_dir)[source]
Parameters:

base_dir (Path) –

get_document(topic_key, doc_id)[source]
Parameters:
  • topic_key (str) –

  • doc_id (str) –

Return type:

str

get_documents(topic_key)[source]
Parameters:

topic_key (str) –

Return type:

FrozenSet[str]

get_env(topic_key)[source]
Parameters:

topic_key (str) –

Return type:

TextEnvironment[str, ndarray[Any, dtype[Any]], str]

get_envs()[source]
Return type:

Mapping[str, TextEnvironment[str, ndarray[Any, dtype[Any]], str]]

get_labels(topic_key, document)[source]
Parameters:
  • topic_key (str) –

  • document (str) –

Return type:

FrozenSet[str]

get_topicqrels(topic_key)[source]
Parameters:

topic_key (str) –

Return type:

DataFrame

instancelib.ingest.qrel.build_doc_map(topic_docs)[source]
Parameters:

topic_docs (Mapping[str, Mapping[str, Mapping[str, str]]]) –

Return type:

Mapping[str, Set[str]]

instancelib.ingest.qrel.hidden(p)[source]
Parameters:

p (Path) –

Return type:

bool

instancelib.ingest.qrel.read_docids(docid_file)[source]
Parameters:

docid_file (Path) –

Return type:

FrozenSet[str]

instancelib.ingest.qrel.read_doctexts(doctext_file)[source]
Parameters:

doctext_file (Path) –

Return type:

Optional[Mapping[str, Mapping[str, str]]]

instancelib.ingest.qrel.read_qrel(qrel_file)[source]
Parameters:

qrel_file (Path) –

Return type:

DataFrame

instancelib.ingest.qrel.read_qrel_dataset(base_dir)[source]
Parameters:

base_dir (Path) –

instancelib.ingest.qrel.read_topics(topic_dir)[source]
Parameters:

topic_dir (Path) –

Return type:

DataFrame