dahara1 commited on
Commit
07411c4
·
verified ·
1 Parent(s): cd05b82

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +150 -0
README.md CHANGED
@@ -139,3 +139,153 @@ for i, code_list in enumerate(code_lists):
139
  wavfile.write(filename, 24000, sample_np)
140
  print(f"Saved audio to: {filename}")
141
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  wavfile.write(filename, 24000, sample_np)
140
  print(f"Saved audio to: {filename}")
141
  ```
142
+
143
+
144
+ ## Streaming sample
145
+
146
+ ### Sever side command
147
+ ```
148
+ python3 -m vllm.entrypoints.openai.api_server --model VoiceCore_gptq --host 0.0.0.0 --port 8000 --max-model-len 9000python3 -m vllm.entrypoints.openai.api_server --model VoiceCore_gptq --host 0.0.0.0 --port 8000 --max-model-len 9000
149
+ ```
150
+ ### Client side scripyt
151
+ ```
152
+ import torch
153
+ from transformers import AutoTokenizer
154
+ from snac import SNAC
155
+ import requests
156
+ import json
157
+ import sounddevice as sd
158
+ import numpy as np
159
+ import queue
160
+ import threading
161
+
162
+ # --- サーバー設定とモデルの準備 (変更なし) ---
163
+ SERVER_URL = "http://192.168.1.16:8000/v1/completions"
164
+ TOKENIZER_PATH = "webbigdata/VoiceCore_gptq"
165
+ MODEL_NAME = "VoiceCore_gptq"
166
+
167
+ prompts = [
168
+ "テストです",
169
+ "ジーピーティーキュー、問題なく動いてますかね?圧縮しすぎると別人の声になっちゃう事があるんですよね、ふふふ"
170
+ ]
171
+ chosen_voice = "matsukaze_male[neutral]"
172
+
173
+ print("Loading tokenizer...")
174
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)
175
+ start_token, end_tokens = [128259], [128009, 128260, 128261]
176
+
177
+ print("Loading SNAC decoder to CPU...")
178
+ snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
179
+ snac_model.to("cpu")
180
+ print("SNAC model loaded.")
181
+ audio_start_token = 128257
182
+
183
+ def redistribute_codes(code_list):
184
+ if len(code_list) % 7 != 0: return torch.tensor([])
185
+ layer_1, layer_2, layer_3 = [], [], []
186
+ for i in range(len(code_list) // 7):
187
+ layer_1.append(code_list[7*i])
188
+ layer_2.append(code_list[7*i+1] - 4096)
189
+ layer_3.append(code_list[7*i+2] - (2*4096)); layer_3.append(code_list[7*i+3] - (3*4096))
190
+ layer_2.append(code_list[7*i+4] - (4*4096)); layer_3.append(code_list[7*i+5] - (5*4096))
191
+ layer_3.append(code_list[7*i+6] - (6*4096))
192
+ codes = [torch.tensor(layer).unsqueeze(0) for layer in [layer_1, layer_2, layer_3]]
193
+ return snac_model.decode(codes)
194
+
195
+
196
+ def audio_playback_worker(q, stream):
197
+ while True:
198
+ data = q.get()
199
+ if data is None:
200
+ break
201
+ stream.write(data)
202
+
203
+ for i, prompt in enumerate(prompts):
204
+ print("\n" + "="*50)
205
+ print(f"Processing prompt ({i+1}/{len(prompts)}): '{prompt}'")
206
+ print("="*50)
207
+
208
+ prompt_ = (f"{chosen_voice}: " + prompt) if chosen_voice else prompt
209
+ input_ids = tokenizer.encode(prompt_)
210
+ final_token_ids = start_token + input_ids + end_tokens
211
+
212
+ payload = {
213
+ "model": MODEL_NAME, "prompt": final_token_ids,
214
+ "max_tokens": 8192, "temperature": 0.6, "top_p": 0.90,
215
+ "repetition_penalty": 1.1, "stop_token_ids": [128258],
216
+ "stream": True
217
+ }
218
+
219
+ token_buffer = []
220
+ found_audio_start = False
221
+ CHUNK_SIZE = 28
222
+
223
+ audio_queue = queue.Queue()
224
+ playback_stream = sd.OutputStream(samplerate=24000, channels=1, dtype='float32')
225
+ playback_stream.start()
226
+
227
+ playback_thread = threading.Thread(target=audio_playback_worker, args=(audio_queue, playback_stream))
228
+ playback_thread.start()
229
+
230
+ try:
231
+ response = requests.post(SERVER_URL, headers={"Content-Type": "application/json"}, json=payload, stream=True)
232
+ response.raise_for_status()
233
+
234
+ for line in response.iter_lines():
235
+ if line:
236
+ decoded_line = line.decode('utf-8')
237
+ if decoded_line.startswith('data: '):
238
+ content = decoded_line[6:]
239
+ if content == '[DONE]':
240
+ break
241
+
242
+ try:
243
+ chunk = json.loads(content)
244
+ text_chunk = chunk['choices'][0]['text']
245
+ if text_chunk:
246
+ token_buffer.extend(tokenizer.encode(text_chunk, add_special_tokens=False))
247
+
248
+ if not found_audio_start:
249
+ try:
250
+ start_index = token_buffer.index(audio_start_token)
251
+ token_buffer = token_buffer[start_index + 1:]
252
+ found_audio_start = True
253
+ print("Audio start token found. Starting playback...")
254
+ except ValueError:
255
+ continue
256
+
257
+ while len(token_buffer) >= CHUNK_SIZE:
258
+ tokens_to_process = token_buffer[:CHUNK_SIZE]
259
+ token_buffer = token_buffer[CHUNK_SIZE:]
260
+
261
+ code_list = [t - 128266 for t in tokens_to_process]
262
+ samples = redistribute_codes(code_list)
263
+
264
+ if samples.numel() > 0:
265
+ sample_np = samples.detach().squeeze().numpy()
266
+ audio_queue.put(sample_np)
267
+
268
+ except (json.JSONDecodeError, Exception) as e:
269
+ print(f"処理中にエラー: {e}")
270
+
271
+ if found_audio_start and token_buffer:
272
+ remaining_length = (len(token_buffer) // 7) * 7
273
+ if remaining_length > 0:
274
+ tokens_to_process = token_buffer[:remaining_length]
275
+ code_list = [t - 128266 for t in tokens_to_process]
276
+ samples = redistribute_codes(code_list)
277
+ if samples.numel() > 0:
278
+ sample_np = samples.detach().squeeze().numpy()
279
+ audio_queue.put(sample_np)
280
+
281
+ except requests.exceptions.RequestException as e:
282
+ print(f"サーバーへのリクエストでエラーが発生しました: {e}")
283
+ finally:
284
+ audio_queue.put(None)
285
+ playback_thread.join()
286
+ playback_stream.stop()
287
+ playback_stream.close()
288
+ print("Playback finished for this prompt.")
289
+
290
+ print("\nAll processing complete!")
291
+ ```