|
import os |
|
import uuid |
|
import base64 |
|
import shutil |
|
from typing import List |
|
import time |
|
import cv2 |
|
import psutil |
|
import ollama |
|
import uvicorn |
|
from fastapi import FastAPI, File, UploadFile, Form, HTTPException |
|
from fastapi.responses import JSONResponse |
|
|
|
try: |
|
import pynvml |
|
pynvml.nvmlInit() |
|
GPU_METRICS_AVAILABLE = True |
|
except (ImportError, pynvml.NVMLError): |
|
GPU_METRICS_AVAILABLE = False |
|
|
|
from video_processor import extract_frames, FrameSamplingMethod, encode_frames_to_base64 |
|
|
|
import logging |
|
import argparse |
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--model_name", type=str, default="openbmb/minicpm-v4:latest") |
|
args = parser.parse_args() |
|
|
|
os.makedirs(f'logs/{args.model_name}', exist_ok=True) |
|
|
|
|
|
app = FastAPI(title = "Video Inference Service") |
|
|
|
|
|
TEMP_VIDEO_DIR = "temp_videos" |
|
os.makedirs(TEMP_VIDEO_DIR, exist_ok=True) |
|
|
|
|
|
log_filename = f"logs/{args.model_name}/{time.strftime('%Y%m%d_%H%M%S')}.log" |
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename=log_filename, filemode='a') |
|
|
|
@app.post("/video-inference/") |
|
async def video_inference( |
|
prompt: str = Form(...), |
|
video_path: str = Form(...), |
|
sampling_method: str = Form(...), |
|
sampling_rate: int = Form(5), |
|
): |
|
""" |
|
接收视频和文本提示,进行推理并返回结果。 |
|
- prompt: 用户的问题。 |
|
- video_file: 上传的视频文件。 |
|
- sampling_method: 帧采样方法 ('uniform' 或 'content_aware')。 |
|
- sampling_rate: 采样率或阈值。 |
|
""" |
|
try: |
|
request_start_time = time.time() |
|
request_id = str(uuid.uuid4()) |
|
logging.info(f"[{request_id}] Received new video inference request. Prompt: '{prompt}', Video: '{video_path}'") |
|
|
|
|
|
if not os.path.exists(video_path): |
|
raise FileNotFoundError(f"Video file not found: {video_path}") |
|
|
|
if not video_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')): |
|
logging.warning(f"[{request_id}] File '{video_path}' may not be a video file.") |
|
|
|
|
|
sampling_method_map = { |
|
"CONTENT_AWARE": FrameSamplingMethod.CONTENT_AWARE, |
|
"UNIFORM": FrameSamplingMethod.UNIFORM, |
|
} |
|
sampling_method = sampling_method_map.get(sampling_method, FrameSamplingMethod.CONTENT_AWARE) |
|
|
|
|
|
temp_frame_dir = os.path.join(TEMP_VIDEO_DIR, request_id) |
|
os.makedirs(temp_frame_dir, exist_ok=True) |
|
except Exception as e: |
|
logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True) |
|
raise HTTPException(status_code=500, detail=f"An error occurred during processing: {str(e)}") |
|
|
|
try: |
|
logging.info(f"[{request_id}] Extracting frames using method: {sampling_method.value}, rate/threshold: {sampling_rate}") |
|
|
|
frames = extract_frames(video_path, sampling_method, sampling_rate) |
|
if not frames: |
|
raise ValueError(f"Could not extract any frames from the video: {video_path}") |
|
|
|
logging.info(f"[{request_id}] Extracted {len(frames)} frames successfully. Saving to temporary files...") |
|
|
|
base64_frames = encode_frames_to_base64(frames) |
|
logging.info(f"[{request_id}] Encoded {len(base64_frames)} frames to Base64.") |
|
|
|
|
|
final_prompt = prompt |
|
|
|
|
|
try: |
|
logging.info(f"[{request_id}] Sending request to Ollama model '{args.model_name}'...") |
|
|
|
|
|
psutil.cpu_percent(interval=None) |
|
psutil.cpu_percent(interval=None, percpu=True) |
|
|
|
ollama_start_time = time.time() |
|
response = ollama.chat( |
|
model=args.model_name, |
|
messages=[ |
|
{ |
|
'role': 'user', |
|
'content': final_prompt, |
|
'images': base64_frames, |
|
} |
|
] |
|
) |
|
ollama_end_time = time.time() |
|
|
|
|
|
cpu_usage = psutil.cpu_percent(interval=None) |
|
cpu_core_utilization = psutil.cpu_percent(interval=None, percpu=True) |
|
|
|
logging.info(f"[{request_id}] Received response from Ollama successfully.") |
|
return response |
|
|
|
except Exception as ollama_error: |
|
|
|
logging.error(f"[{request_id}] Ollama inference failed: {str(ollama_error)}", exc_info=True) |
|
raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") |
|
|
|
except Exception as e: |
|
logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True) |
|
raise HTTPException(status_code=500, detail=f"An error occurred during processing: {str(e)}") |
|
finally: |
|
|
|
if os.path.exists(temp_frame_dir): |
|
shutil.rmtree(temp_frame_dir) |
|
logging.info(f"[{request_id}] Cleaned up temporary file: {temp_frame_dir}") |
|
|
|
if __name__ == "__main__": |
|
uvicorn.run(app, host="0.0.0.0", port=8008) |