Update handler.py

154c369 verified 7 days ago

5.68 kB

	import torch
	import numpy as np
	import librosa
	import requests
	import io
	import os
	import base64 # <-- Nova importação
	import matplotlib.pyplot as plt # <-- Nova importação
	import soundfile as sf
	from scipy.signal import butter, lfilter
	from transformers import pipeline, AutoImageProcessor, AutoModelForImageClassification
	from PIL import Image
	from pydub import AudioSegment

	# --- FUNÇÕES DE PRÉ-PROCESSAMENTO ---
	TARGET_SR = 2000
	IMAGE_HEIGHT = 128

	def butter_bandpass_filter(data, fs, lowcut=20.0, highcut=200.0, order=3):
	nyq = 0.5 * fs
	low = lowcut / nyq
	high = highcut / nyq
	if high >= 1 or low <= 0:
	return data
	b, a = butter(order, [low, high], btype='band')
	return lfilter(b, a, data)

	def create_spectrogram_image(y_cleaned, sr):
	mel_spec = librosa.feature.melspectrogram(y=y_cleaned, sr=sr, n_mels=IMAGE_HEIGHT)
	S_DB = librosa.power_to_db(mel_spec, ref=np.max)
	img_array = (S_DB - S_DB.min()) / (S_DB.max() - S_DB.min() + 1e-6) * 255.0
	img_array = img_array.astype(np.uint8)
	return Image.fromarray(img_array).convert("RGB")

	def calculate_bpm(y_cleaned, sr):
	onset_env = librosa.onset.onset_strength(y=y_cleaned, sr=sr, aggregate=np.mean)
	bpm = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)[0]
	return bpm

	# --- HANDLER DO ENDPOINT ---
	class EndpointHandler:
	def __init__(self, path=""):
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model_directory = os.path.join(path, "modelo-vit-audio-final")

	processor = AutoImageProcessor.from_pretrained(model_directory)
	model = AutoModelForImageClassification.from_pretrained(model_directory).to(device)

	self.pipe = pipeline(
	"image-classification",
	model=model,
	image_processor=processor,
	device=device
	)
	print("Pipeline ViT com pré-processamento de áudio (via URL) carregado com sucesso.")

	def __call__(self, data: dict) -> list:
	audio_url = data.pop("inputs", None)
	if not audio_url or not isinstance(audio_url, str):
	return [{"error": "Nenhum 'inputs' (URL de áudio como string) foi fornecido."}]

	try:
	print(f"Baixando e processando áudio de: {audio_url}")

	response = requests.get(audio_url)
	response.raise_for_status()
	audio_data = io.BytesIO(response.content)

	sound = AudioSegment.from_file(audio_data)
	sound = sound.set_channels(1)

	sr_original = sound.frame_rate
	y = np.array(sound.get_array_of_samples()).astype(np.float32)
	y_normalized = y / (2**15)

	if sr_original != TARGET_SR:
	y_resampled = librosa.resample(y=y_normalized, orig_sr=sr_original, target_sr=TARGET_SR)
	else:
	y_resampled = y_normalized

	y_cleaned = butter_bandpass_filter(y_resampled, fs=TARGET_SR)
	spectrogram_image = create_spectrogram_image(y_cleaned, TARGET_SR)

	# Gera um buffer em memória com o áudio filtrado
	buffer = io.BytesIO()
	sf.write(buffer, y_cleaned, TARGET_SR, format='WAV')
	buffer.seek(0)

	# Codifica em Base64
	audio_base64 = base64.b64encode(buffer.read()).decode('utf-8')

	bpm = calculate_bpm(y_cleaned, TARGET_SR)
	print(f"BPM Estimado: {bpm:.0f}")

	# --- NOVO BLOCO: GERAR E CODIFICAR O GRÁFICO PCG ---
	print("Gerando e codificando gráfico PCG para a resposta...")
	time_axis = np.arange(0, len(y_cleaned)) / TARGET_SR

	start_time, end_time = 1.0, 5.0
	start_index, end_index = int(start_time * TARGET_SR), int(end_time * TARGET_SR)
	if end_index > len(y_cleaned):
	end_index = len(y_cleaned)
	start_index = max(0, end_index - int(4 * TARGET_SR))

	fig, ax = plt.subplots(figsize=(15, 5))
	ax.plot(time_axis, y_cleaned, linewidth=0.7)
	ax.set_title("Fonocardiograma (PCG)")
	ax.set_xlabel("Tempo (segundos)")
	ax.set_ylabel("Amplitude")
	ax.grid(True, linestyle='--')
	ax.set_xlim(time_axis[start_index], time_axis[end_index - 1])

	# Salva o gráfico em um buffer de memória
	buf = io.BytesIO()
	plt.savefig(buf, format='png', bbox_inches='tight')
	plt.close(fig) # Fecha a figura para liberar memória
	buf.seek(0)

	# Codifica a imagem em base64
	pcg_image_base64 = base64.b64encode(buf.read()).decode('utf-8')
	# ---------------------------------------------------------

	print("Enviando espectrograma para o pipeline de predição...")
	prediction = self.pipe(spectrogram_image)
	print(f"Predição concluída: {prediction}")

	# --- RESPOSTA FINAL ATUALIZADA ---
	final_response = {
	"classification_results": prediction,
	"bpm_estimated": int(round(bpm)),
	"pcg_image_base64": f'data:image/png;base64,{pcg_image_base64}', # <-- Adicionamos a imagem aqui
	"audio_base64": f'data:audio/mp3;base64,{audio_base64}' #Audio codificado base64
	}
	return [final_response]

	except Exception as e:
	error_message = f"Erro ao processar a URL do áudio: {str(e)}"
	import traceback
	traceback.print_exc()
	return [{"error": error_message}]