import sys sys.path.append('third_party/Matcha-TTS') from cosyvoice.cli.cosyvoice import CosyVoice, CosyVoice2 from cosyvoice.utils.file_utils import load_wav import torchaudio # 加载预训练模型 cosyvoice = CosyVoice2('pretrained_models/CosyVoice2-0.5B', load_jit=False, load_trt=False, fp16=False) # 输入文本,进行推理 input_text = '过去的欢声笑语似乎从未存在,现在只剩失落' prompt_speech_16k = load_wav('./asset/zero_shot_prompt.wav', 16000) # 进行推理并保存结果为文件 for i, j in enumerate(cosyvoice.inference_zero_shot(input_text, '希望你以后能够做的比我还好呦。', prompt_speech_16k, stream=False)): torchaudio.save(f'output_{i}.wav', j['tts_speech'], cosyvoice.sample_rate)