|
import sys |
|
sys.path.append('third_party/Matcha-TTS') |
|
from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 |
|
from cosyvoice.utils.file_utils import load_wav |
|
import torchaudio |
|
|
|
|
|
cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False) |
|
|
|
|
|
input_text = '过去的欢声笑语似乎从未存在,<crying>现在只剩失落' |
|
prompt_speech_16k = load_wav('./asset/zero_shot_prompt.wav', 16000) |
|
|
|
for i, j in enumerate(cosyvoice.inference_zero_shot(input_text, '希望你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)): |
|
torchaudio.save(f'output_{i}.wav', j['tts_speech'], cosyvoice.sample_rate) |
|
|