|
from pathlib import Path |
|
|
|
import pytest |
|
from dotenv import dotenv_values |
|
|
|
from s3prl.dataio.corpus.quesst14 import Quesst14 |
|
|
|
|
|
@pytest.mark.corpus |
|
def test_quesst14_for_qbe(): |
|
def quesst14_for_qbe(dataset_root: str): |
|
corpus = Quesst14(dataset_root) |
|
|
|
def path_to_dict(path: str): |
|
return dict( |
|
wav_path=path, |
|
) |
|
|
|
return dict( |
|
all_data={ |
|
Path(path).stem: path_to_dict(path) |
|
for path in (corpus.valid_queries + corpus.test_queries + corpus.docs) |
|
}, |
|
valid_keys=[Path(path).stem for path in corpus.valid_queries], |
|
test_keys=[Path(path).stem for path in corpus.test_queries], |
|
doc_keys=[Path(path).stem for path in corpus.docs], |
|
) |
|
|
|
quesst_root = dotenv_values()["Quesst14"] |
|
all_data, valid_keys, test_keys, doc_keys = quesst14_for_qbe(quesst_root).values() |
|
assert len(all_data) == 2714 |
|
assert len(valid_keys) + len(test_keys) + len(doc_keys) == 2714 |
|
|